xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision dbf0e21dc9b62c19f2224f20451fa79066c7e006)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "../src/inline/spops.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscTruth     aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap->n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap->n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap->n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap->n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 
207 #define CHUNKSIZE   15
208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
209 { \
210     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
211     lastcol1 = col;\
212     while (high1-low1 > 5) { \
213       t = (low1+high1)/2; \
214       if (rp1[t] > col) high1 = t; \
215       else             low1  = t; \
216     } \
217       for (_i=low1; _i<high1; _i++) { \
218         if (rp1[_i] > col) break; \
219         if (rp1[_i] == col) { \
220           if (addv == ADD_VALUES) ap1[_i] += value;   \
221           else                    ap1[_i] = value; \
222           goto a_noinsert; \
223         } \
224       }  \
225       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
226       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
227       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
228       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
229       N = nrow1++ - 1; a->nz++; high1++; \
230       /* shift up all the later entries in this row */ \
231       for (ii=N; ii>=_i; ii--) { \
232         rp1[ii+1] = rp1[ii]; \
233         ap1[ii+1] = ap1[ii]; \
234       } \
235       rp1[_i] = col;  \
236       ap1[_i] = value;  \
237       a_noinsert: ; \
238       ailen[row] = nrow1; \
239 }
240 
241 
242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
243 { \
244     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
245     lastcol2 = col;\
246     while (high2-low2 > 5) { \
247       t = (low2+high2)/2; \
248       if (rp2[t] > col) high2 = t; \
249       else             low2  = t; \
250     } \
251     for (_i=low2; _i<high2; _i++) {		\
252       if (rp2[_i] > col) break;			\
253       if (rp2[_i] == col) {			      \
254 	if (addv == ADD_VALUES) ap2[_i] += value;     \
255 	else                    ap2[_i] = value;      \
256 	goto b_noinsert;			      \
257       }						      \
258     }							      \
259     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
260     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
261     if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
262     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
263     N = nrow2++ - 1; b->nz++; high2++;					\
264     /* shift up all the later entries in this row */			\
265     for (ii=N; ii>=_i; ii--) {						\
266       rp2[ii+1] = rp2[ii];						\
267       ap2[ii+1] = ap2[ii];						\
268     }									\
269     rp2[_i] = col;							\
270     ap2[_i] = value;							\
271     b_noinsert: ;								\
272     bilen[row] = nrow2;							\
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
278 {
279   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
280   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
281   PetscErrorCode ierr;
282   PetscInt       l,*garray = mat->garray,diag;
283 
284   PetscFunctionBegin;
285   /* code only works for square matrices A */
286 
287   /* find size of row to the left of the diagonal part */
288   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
289   row  = row - diag;
290   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
291     if (garray[b->j[b->i[row]+l]] > diag) break;
292   }
293   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
294 
295   /* diagonal part */
296   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
297 
298   /* right of diagonal part */
299   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
300   PetscFunctionReturn(0);
301 }
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "MatSetValues_MPIAIJ"
305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
306 {
307   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
308   PetscScalar    value;
309   PetscErrorCode ierr;
310   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
311   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
312   PetscTruth     roworiented = aij->roworiented;
313 
314   /* Some Variables required in the macro */
315   Mat            A = aij->A;
316   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
317   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
318   MatScalar      *aa = a->a;
319   PetscTruth     ignorezeroentries = a->ignorezeroentries;
320   Mat            B = aij->B;
321   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
322   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
323   MatScalar      *ba = b->a;
324 
325   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
326   PetscInt       nonew = a->nonew;
327   MatScalar      *ap1,*ap2;
328 
329   PetscFunctionBegin;
330   for (i=0; i<m; i++) {
331     if (im[i] < 0) continue;
332 #if defined(PETSC_USE_DEBUG)
333     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
334 #endif
335     if (im[i] >= rstart && im[i] < rend) {
336       row      = im[i] - rstart;
337       lastcol1 = -1;
338       rp1      = aj + ai[row];
339       ap1      = aa + ai[row];
340       rmax1    = aimax[row];
341       nrow1    = ailen[row];
342       low1     = 0;
343       high1    = nrow1;
344       lastcol2 = -1;
345       rp2      = bj + bi[row];
346       ap2      = ba + bi[row];
347       rmax2    = bimax[row];
348       nrow2    = bilen[row];
349       low2     = 0;
350       high2    = nrow2;
351 
352       for (j=0; j<n; j++) {
353         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
354         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
355         if (in[j] >= cstart && in[j] < cend){
356           col = in[j] - cstart;
357           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
358         } else if (in[j] < 0) continue;
359 #if defined(PETSC_USE_DEBUG)
360         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
361 #endif
362         else {
363           if (mat->was_assembled) {
364             if (!aij->colmap) {
365               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
366             }
367 #if defined (PETSC_USE_CTABLE)
368             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
369 	    col--;
370 #else
371             col = aij->colmap[in[j]] - 1;
372 #endif
373             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
374               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
375               col =  in[j];
376               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
377               B = aij->B;
378               b = (Mat_SeqAIJ*)B->data;
379               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
380               rp2      = bj + bi[row];
381               ap2      = ba + bi[row];
382               rmax2    = bimax[row];
383               nrow2    = bilen[row];
384               low2     = 0;
385               high2    = nrow2;
386               bm       = aij->B->rmap->n;
387               ba = b->a;
388             }
389           } else col = in[j];
390           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
391         }
392       }
393     } else {
394       if (!aij->donotstash) {
395         if (roworiented) {
396           if (ignorezeroentries && v[i*n] == 0.0) continue;
397           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
398         } else {
399           if (ignorezeroentries && v[i] == 0.0) continue;
400           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
401         }
402       }
403     }
404   }
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNCT__
409 #define __FUNCT__ "MatGetValues_MPIAIJ"
410 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
411 {
412   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
413   PetscErrorCode ierr;
414   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
415   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
416 
417   PetscFunctionBegin;
418   for (i=0; i<m; i++) {
419     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
420     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
421     if (idxm[i] >= rstart && idxm[i] < rend) {
422       row = idxm[i] - rstart;
423       for (j=0; j<n; j++) {
424         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
425         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
426         if (idxn[j] >= cstart && idxn[j] < cend){
427           col = idxn[j] - cstart;
428           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
429         } else {
430           if (!aij->colmap) {
431             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
432           }
433 #if defined (PETSC_USE_CTABLE)
434           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
435           col --;
436 #else
437           col = aij->colmap[idxn[j]] - 1;
438 #endif
439           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
440           else {
441             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
442           }
443         }
444       }
445     } else {
446       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
447     }
448   }
449   PetscFunctionReturn(0);
450 }
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscErrorCode ierr;
458   PetscInt       nstash,reallocs;
459   InsertMode     addv;
460 
461   PetscFunctionBegin;
462   if (aij->donotstash) {
463     PetscFunctionReturn(0);
464   }
465 
466   /* make sure all processors are either in INSERTMODE or ADDMODE */
467   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
468   if (addv == (ADD_VALUES|INSERT_VALUES)) {
469     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
470   }
471   mat->insertmode = addv; /* in case this processor had no cache */
472 
473   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
474   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
475   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
482 {
483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
484   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
485   PetscErrorCode ierr;
486   PetscMPIInt    n;
487   PetscInt       i,j,rstart,ncols,flg;
488   PetscInt       *row,*col;
489   PetscTruth     other_disassembled;
490   PetscScalar    *val;
491   InsertMode     addv = mat->insertmode;
492 
493   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
494   PetscFunctionBegin;
495   if (!aij->donotstash) {
496     while (1) {
497       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
498       if (!flg) break;
499 
500       for (i=0; i<n;) {
501         /* Now identify the consecutive vals belonging to the same row */
502         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
503         if (j < n) ncols = j-i;
504         else       ncols = n-i;
505         /* Now assemble all these values with a single function call */
506         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
507         i = j;
508       }
509     }
510     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
511   }
512   a->compressedrow.use     = PETSC_FALSE;
513   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
514   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
515 
516   /* determine if any processor has disassembled, if so we must
517      also disassemble ourselfs, in order that we may reassemble. */
518   /*
519      if nonzero structure of submatrix B cannot change then we know that
520      no processor disassembled thus we can skip this stuff
521   */
522   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
523     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
524     if (mat->was_assembled && !other_disassembled) {
525       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
526     }
527   }
528   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
529     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
530   }
531   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
532   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
533   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
534   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
535 
536   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
537   aij->rowvalues = 0;
538 
539   /* used by MatAXPY() */
540   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
541   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
542 
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
549 {
550   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
551   PetscErrorCode ierr;
552 
553   PetscFunctionBegin;
554   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
555   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNCT__
560 #define __FUNCT__ "MatZeroRows_MPIAIJ"
561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
562 {
563   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
564   PetscErrorCode ierr;
565   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
566   PetscInt       i,*owners = A->rmap->range;
567   PetscInt       *nprocs,j,idx,nsends,row;
568   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
569   PetscInt       *rvalues,count,base,slen,*source;
570   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
571   MPI_Comm       comm = ((PetscObject)A)->comm;
572   MPI_Request    *send_waits,*recv_waits;
573   MPI_Status     recv_status,*send_status;
574 #if defined(PETSC_DEBUG)
575   PetscTruth     found = PETSC_FALSE;
576 #endif
577 
578   PetscFunctionBegin;
579   /*  first count number of contributors to each processor */
580   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
581   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
582   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
583   j = 0;
584   for (i=0; i<N; i++) {
585     if (lastidx > (idx = rows[i])) j = 0;
586     lastidx = idx;
587     for (; j<size; j++) {
588       if (idx >= owners[j] && idx < owners[j+1]) {
589         nprocs[2*j]++;
590         nprocs[2*j+1] = 1;
591         owner[i] = j;
592 #if defined(PETSC_DEBUG)
593         found = PETSC_TRUE;
594 #endif
595         break;
596       }
597     }
598 #if defined(PETSC_DEBUG)
599     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
600     found = PETSC_FALSE;
601 #endif
602   }
603   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
604 
605   /* inform other processors of number of messages and max length*/
606   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
607 
608   /* post receives:   */
609   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
610   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
611   for (i=0; i<nrecvs; i++) {
612     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
613   }
614 
615   /* do sends:
616       1) starts[i] gives the starting index in svalues for stuff going to
617          the ith processor
618   */
619   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
620   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
621   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
622   starts[0] = 0;
623   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
624   for (i=0; i<N; i++) {
625     svalues[starts[owner[i]]++] = rows[i];
626   }
627 
628   starts[0] = 0;
629   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
630   count = 0;
631   for (i=0; i<size; i++) {
632     if (nprocs[2*i+1]) {
633       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
634     }
635   }
636   ierr = PetscFree(starts);CHKERRQ(ierr);
637 
638   base = owners[rank];
639 
640   /*  wait on receives */
641   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
642   source = lens + nrecvs;
643   count  = nrecvs; slen = 0;
644   while (count) {
645     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
646     /* unpack receives into our local space */
647     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
648     source[imdex]  = recv_status.MPI_SOURCE;
649     lens[imdex]    = n;
650     slen          += n;
651     count--;
652   }
653   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
654 
655   /* move the data into the send scatter */
656   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
657   count = 0;
658   for (i=0; i<nrecvs; i++) {
659     values = rvalues + i*nmax;
660     for (j=0; j<lens[i]; j++) {
661       lrows[count++] = values[j] - base;
662     }
663   }
664   ierr = PetscFree(rvalues);CHKERRQ(ierr);
665   ierr = PetscFree(lens);CHKERRQ(ierr);
666   ierr = PetscFree(owner);CHKERRQ(ierr);
667   ierr = PetscFree(nprocs);CHKERRQ(ierr);
668 
669   /* actually zap the local rows */
670   /*
671         Zero the required rows. If the "diagonal block" of the matrix
672      is square and the user wishes to set the diagonal we use separate
673      code so that MatSetValues() is not called for each diagonal allocating
674      new memory, thus calling lots of mallocs and slowing things down.
675 
676        Contributed by: Matthew Knepley
677   */
678   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
679   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
680   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
681     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
682   } else if (diag != 0.0) {
683     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
684     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
685       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
687     }
688     for (i = 0; i < slen; i++) {
689       row  = lrows[i] + rstart;
690       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
691     }
692     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
693     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
694   } else {
695     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
696   }
697   ierr = PetscFree(lrows);CHKERRQ(ierr);
698 
699   /* wait on sends */
700   if (nsends) {
701     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
702     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
703     ierr = PetscFree(send_status);CHKERRQ(ierr);
704   }
705   ierr = PetscFree(send_waits);CHKERRQ(ierr);
706   ierr = PetscFree(svalues);CHKERRQ(ierr);
707 
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNCT__
712 #define __FUNCT__ "MatMult_MPIAIJ"
713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
714 {
715   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
716   PetscErrorCode ierr;
717   PetscInt       nt;
718 
719   PetscFunctionBegin;
720   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
721   if (nt != A->cmap->n) {
722     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
723   }
724   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
725   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
726   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
727   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
728   PetscFunctionReturn(0);
729 }
730 
731 #undef __FUNCT__
732 #define __FUNCT__ "MatMultAdd_MPIAIJ"
733 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
734 {
735   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
736   PetscErrorCode ierr;
737 
738   PetscFunctionBegin;
739   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
740   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
741   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
742   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
743   PetscFunctionReturn(0);
744 }
745 
746 #undef __FUNCT__
747 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
748 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
749 {
750   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
751   PetscErrorCode ierr;
752   PetscTruth     merged;
753 
754   PetscFunctionBegin;
755   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
756   /* do nondiagonal part */
757   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
758   if (!merged) {
759     /* send it on its way */
760     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
761     /* do local part */
762     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
763     /* receive remote parts: note this assumes the values are not actually */
764     /* added in yy until the next line, */
765     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
766   } else {
767     /* do local part */
768     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
769     /* send it on its way */
770     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
771     /* values actually were received in the Begin() but we need to call this nop */
772     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
773   }
774   PetscFunctionReturn(0);
775 }
776 
777 EXTERN_C_BEGIN
778 #undef __FUNCT__
779 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
780 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
781 {
782   MPI_Comm       comm;
783   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
784   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
785   IS             Me,Notme;
786   PetscErrorCode ierr;
787   PetscInt       M,N,first,last,*notme,i;
788   PetscMPIInt    size;
789 
790   PetscFunctionBegin;
791 
792   /* Easy test: symmetric diagonal block */
793   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
794   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
795   if (!*f) PetscFunctionReturn(0);
796   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
797   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
798   if (size == 1) PetscFunctionReturn(0);
799 
800   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
801   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
802   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
803   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
804   for (i=0; i<first; i++) notme[i] = i;
805   for (i=last; i<M; i++) notme[i-last+first] = i;
806   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
807   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
808   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
809   Aoff = Aoffs[0];
810   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
811   Boff = Boffs[0];
812   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
813   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
814   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
815   ierr = ISDestroy(Me);CHKERRQ(ierr);
816   ierr = ISDestroy(Notme);CHKERRQ(ierr);
817 
818   PetscFunctionReturn(0);
819 }
820 EXTERN_C_END
821 
822 #undef __FUNCT__
823 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
824 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
825 {
826   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
827   PetscErrorCode ierr;
828 
829   PetscFunctionBegin;
830   /* do nondiagonal part */
831   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
832   /* send it on its way */
833   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
834   /* do local part */
835   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
836   /* receive remote parts */
837   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
838   PetscFunctionReturn(0);
839 }
840 
841 /*
842   This only works correctly for square matrices where the subblock A->A is the
843    diagonal block
844 */
845 #undef __FUNCT__
846 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
847 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
848 {
849   PetscErrorCode ierr;
850   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
851 
852   PetscFunctionBegin;
853   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
854   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) {
855     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
856   }
857   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
858   PetscFunctionReturn(0);
859 }
860 
861 #undef __FUNCT__
862 #define __FUNCT__ "MatScale_MPIAIJ"
863 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
864 {
865   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
866   PetscErrorCode ierr;
867 
868   PetscFunctionBegin;
869   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
870   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
871   PetscFunctionReturn(0);
872 }
873 
874 #undef __FUNCT__
875 #define __FUNCT__ "MatDestroy_MPIAIJ"
876 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
877 {
878   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
879   PetscErrorCode ierr;
880 
881   PetscFunctionBegin;
882 #if defined(PETSC_USE_LOG)
883   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
884 #endif
885   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
886   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
887   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
888 #if defined (PETSC_USE_CTABLE)
889   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
890 #else
891   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
892 #endif
893   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
894   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
895   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
896   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
897   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
898   ierr = PetscFree(aij);CHKERRQ(ierr);
899 
900   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
901   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
902   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
903   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
904   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
905   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
906   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
907   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
908   PetscFunctionReturn(0);
909 }
910 
911 #undef __FUNCT__
912 #define __FUNCT__ "MatView_MPIAIJ_Binary"
913 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
914 {
915   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
916   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
917   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
918   PetscErrorCode    ierr;
919   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
920   int               fd;
921   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
922   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
923   PetscScalar       *column_values;
924 
925   PetscFunctionBegin;
926   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
927   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
928   nz   = A->nz + B->nz;
929   if (!rank) {
930     header[0] = MAT_FILE_COOKIE;
931     header[1] = mat->rmap->N;
932     header[2] = mat->cmap->N;
933     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
934     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
935     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
936     /* get largest number of rows any processor has */
937     rlen = mat->rmap->n;
938     range = mat->rmap->range;
939     for (i=1; i<size; i++) {
940       rlen = PetscMax(rlen,range[i+1] - range[i]);
941     }
942   } else {
943     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
944     rlen = mat->rmap->n;
945   }
946 
947   /* load up the local row counts */
948   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
949   for (i=0; i<mat->rmap->n; i++) {
950     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
951   }
952 
953   /* store the row lengths to the file */
954   if (!rank) {
955     MPI_Status status;
956     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
957     for (i=1; i<size; i++) {
958       rlen = range[i+1] - range[i];
959       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
960       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
961     }
962   } else {
963     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
964   }
965   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
966 
967   /* load up the local column indices */
968   nzmax = nz; /* )th processor needs space a largest processor needs */
969   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
970   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
971   cnt  = 0;
972   for (i=0; i<mat->rmap->n; i++) {
973     for (j=B->i[i]; j<B->i[i+1]; j++) {
974       if ( (col = garray[B->j[j]]) > cstart) break;
975       column_indices[cnt++] = col;
976     }
977     for (k=A->i[i]; k<A->i[i+1]; k++) {
978       column_indices[cnt++] = A->j[k] + cstart;
979     }
980     for (; j<B->i[i+1]; j++) {
981       column_indices[cnt++] = garray[B->j[j]];
982     }
983   }
984   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
985 
986   /* store the column indices to the file */
987   if (!rank) {
988     MPI_Status status;
989     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
990     for (i=1; i<size; i++) {
991       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
992       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
993       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
994       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
995     }
996   } else {
997     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
998     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
999   }
1000   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1001 
1002   /* load up the local column values */
1003   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1004   cnt  = 0;
1005   for (i=0; i<mat->rmap->n; i++) {
1006     for (j=B->i[i]; j<B->i[i+1]; j++) {
1007       if ( garray[B->j[j]] > cstart) break;
1008       column_values[cnt++] = B->a[j];
1009     }
1010     for (k=A->i[i]; k<A->i[i+1]; k++) {
1011       column_values[cnt++] = A->a[k];
1012     }
1013     for (; j<B->i[i+1]; j++) {
1014       column_values[cnt++] = B->a[j];
1015     }
1016   }
1017   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1018 
1019   /* store the column values to the file */
1020   if (!rank) {
1021     MPI_Status status;
1022     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1023     for (i=1; i<size; i++) {
1024       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1025       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1026       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1027       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1028     }
1029   } else {
1030     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1031     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1032   }
1033   ierr = PetscFree(column_values);CHKERRQ(ierr);
1034   PetscFunctionReturn(0);
1035 }
1036 
1037 #undef __FUNCT__
1038 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1039 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1040 {
1041   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1042   PetscErrorCode    ierr;
1043   PetscMPIInt       rank = aij->rank,size = aij->size;
1044   PetscTruth        isdraw,iascii,isbinary;
1045   PetscViewer       sviewer;
1046   PetscViewerFormat format;
1047 
1048   PetscFunctionBegin;
1049   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1050   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1051   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1052   if (iascii) {
1053     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1054     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1055       MatInfo    info;
1056       PetscTruth inodes;
1057 
1058       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1059       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1060       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1061       if (!inodes) {
1062         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1063 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1064       } else {
1065         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1066 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1067       }
1068       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1069       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1070       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1071       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1072       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1073       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1074       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1075       PetscFunctionReturn(0);
1076     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1077       PetscInt   inodecount,inodelimit,*inodes;
1078       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1079       if (inodes) {
1080         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1081       } else {
1082         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1083       }
1084       PetscFunctionReturn(0);
1085     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1086       PetscFunctionReturn(0);
1087     }
1088   } else if (isbinary) {
1089     if (size == 1) {
1090       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1091       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1092     } else {
1093       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1094     }
1095     PetscFunctionReturn(0);
1096   } else if (isdraw) {
1097     PetscDraw  draw;
1098     PetscTruth isnull;
1099     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1100     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1101   }
1102 
1103   if (size == 1) {
1104     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1105     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1106   } else {
1107     /* assemble the entire matrix onto first processor. */
1108     Mat         A;
1109     Mat_SeqAIJ  *Aloc;
1110     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1111     MatScalar   *a;
1112 
1113     if (mat->rmap->N > 1024) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 512 rows, use binary format instead");
1114 
1115     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1116     if (!rank) {
1117       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1118     } else {
1119       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1120     }
1121     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1122     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1123     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1124     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1125 
1126     /* copy over the A part */
1127     Aloc = (Mat_SeqAIJ*)aij->A->data;
1128     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1129     row = mat->rmap->rstart;
1130     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1131     for (i=0; i<m; i++) {
1132       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1133       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1134     }
1135     aj = Aloc->j;
1136     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1137 
1138     /* copy over the B part */
1139     Aloc = (Mat_SeqAIJ*)aij->B->data;
1140     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1141     row  = mat->rmap->rstart;
1142     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1143     ct   = cols;
1144     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1145     for (i=0; i<m; i++) {
1146       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1147       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1148     }
1149     ierr = PetscFree(ct);CHKERRQ(ierr);
1150     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1151     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1152     /*
1153        Everyone has to call to draw the matrix since the graphics waits are
1154        synchronized across all processors that share the PetscDraw object
1155     */
1156     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1157     if (!rank) {
1158       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1159       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1160     }
1161     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1162     ierr = MatDestroy(A);CHKERRQ(ierr);
1163   }
1164   PetscFunctionReturn(0);
1165 }
1166 
1167 #undef __FUNCT__
1168 #define __FUNCT__ "MatView_MPIAIJ"
1169 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1170 {
1171   PetscErrorCode ierr;
1172   PetscTruth     iascii,isdraw,issocket,isbinary;
1173 
1174   PetscFunctionBegin;
1175   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1176   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1177   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1178   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1179   if (iascii || isdraw || isbinary || issocket) {
1180     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1181   } else {
1182     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1183   }
1184   PetscFunctionReturn(0);
1185 }
1186 
1187 #undef __FUNCT__
1188 #define __FUNCT__ "MatRelax_MPIAIJ"
1189 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1190 {
1191   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1192   PetscErrorCode ierr;
1193   Vec            bb1;
1194 
1195   PetscFunctionBegin;
1196   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1197 
1198   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1199     if (flag & SOR_ZERO_INITIAL_GUESS) {
1200       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1201       its--;
1202     }
1203 
1204     while (its--) {
1205       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1206       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1207 
1208       /* update rhs: bb1 = bb - B*x */
1209       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1210       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1211 
1212       /* local sweep */
1213       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
1214     }
1215   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1216     if (flag & SOR_ZERO_INITIAL_GUESS) {
1217       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1218       its--;
1219     }
1220     while (its--) {
1221       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1222       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1223 
1224       /* update rhs: bb1 = bb - B*x */
1225       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1226       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1227 
1228       /* local sweep */
1229       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1230     }
1231   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1232     if (flag & SOR_ZERO_INITIAL_GUESS) {
1233       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1234       its--;
1235     }
1236     while (its--) {
1237       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1238       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1239 
1240       /* update rhs: bb1 = bb - B*x */
1241       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1242       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1243 
1244       /* local sweep */
1245       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1246     }
1247   } else {
1248     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1249   }
1250 
1251   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 #undef __FUNCT__
1256 #define __FUNCT__ "MatPermute_MPIAIJ"
1257 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1258 {
1259   MPI_Comm       comm,pcomm;
1260   PetscInt       first,local_size,nrows;
1261   const PetscInt *rows;
1262   PetscMPIInt    size;
1263   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1264   PetscErrorCode ierr;
1265 
1266   PetscFunctionBegin;
1267   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1268   /* make a collective version of 'rowp' */
1269   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1270   if (pcomm==comm) {
1271     crowp = rowp;
1272   } else {
1273     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1274     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1275     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1276     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1277   }
1278   /* collect the global row permutation and invert it */
1279   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1280   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1281   if (pcomm!=comm) {
1282     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1283   }
1284   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1285   /* get the local target indices */
1286   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1287   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1288   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1289   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
1290   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1291   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1292   /* the column permutation is so much easier;
1293      make a local version of 'colp' and invert it */
1294   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1295   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1296   if (size==1) {
1297     lcolp = colp;
1298   } else {
1299     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1300     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1301     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
1302   }
1303   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1304   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1305   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1306   if (size>1) {
1307     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1308     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1309   }
1310   /* now we just get the submatrix */
1311   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1312   /* clean up */
1313   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1314   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1315   PetscFunctionReturn(0);
1316 }
1317 
1318 #undef __FUNCT__
1319 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1320 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1321 {
1322   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1323   Mat            A = mat->A,B = mat->B;
1324   PetscErrorCode ierr;
1325   PetscReal      isend[5],irecv[5];
1326 
1327   PetscFunctionBegin;
1328   info->block_size     = 1.0;
1329   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1330   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1331   isend[3] = info->memory;  isend[4] = info->mallocs;
1332   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1333   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1334   isend[3] += info->memory;  isend[4] += info->mallocs;
1335   if (flag == MAT_LOCAL) {
1336     info->nz_used      = isend[0];
1337     info->nz_allocated = isend[1];
1338     info->nz_unneeded  = isend[2];
1339     info->memory       = isend[3];
1340     info->mallocs      = isend[4];
1341   } else if (flag == MAT_GLOBAL_MAX) {
1342     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1343     info->nz_used      = irecv[0];
1344     info->nz_allocated = irecv[1];
1345     info->nz_unneeded  = irecv[2];
1346     info->memory       = irecv[3];
1347     info->mallocs      = irecv[4];
1348   } else if (flag == MAT_GLOBAL_SUM) {
1349     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1350     info->nz_used      = irecv[0];
1351     info->nz_allocated = irecv[1];
1352     info->nz_unneeded  = irecv[2];
1353     info->memory       = irecv[3];
1354     info->mallocs      = irecv[4];
1355   }
1356   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1357   info->fill_ratio_needed = 0;
1358   info->factor_mallocs    = 0;
1359 
1360   PetscFunctionReturn(0);
1361 }
1362 
1363 #undef __FUNCT__
1364 #define __FUNCT__ "MatSetOption_MPIAIJ"
1365 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1366 {
1367   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1368   PetscErrorCode ierr;
1369 
1370   PetscFunctionBegin;
1371   switch (op) {
1372   case MAT_NEW_NONZERO_LOCATIONS:
1373   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1374   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1375   case MAT_KEEP_ZEROED_ROWS:
1376   case MAT_NEW_NONZERO_LOCATION_ERR:
1377   case MAT_USE_INODES:
1378   case MAT_IGNORE_ZERO_ENTRIES:
1379     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1380     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1381     break;
1382   case MAT_ROW_ORIENTED:
1383     a->roworiented = flg;
1384     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1385     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1386     break;
1387   case MAT_NEW_DIAGONALS:
1388     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1389     break;
1390   case MAT_IGNORE_OFF_PROC_ENTRIES:
1391     a->donotstash = PETSC_TRUE;
1392     break;
1393   case MAT_SYMMETRIC:
1394     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1395     break;
1396   case MAT_STRUCTURALLY_SYMMETRIC:
1397   case MAT_HERMITIAN:
1398   case MAT_SYMMETRY_ETERNAL:
1399     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1400     break;
1401   default:
1402     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1403   }
1404   PetscFunctionReturn(0);
1405 }
1406 
1407 #undef __FUNCT__
1408 #define __FUNCT__ "MatGetRow_MPIAIJ"
1409 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1410 {
1411   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1412   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1413   PetscErrorCode ierr;
1414   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1415   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1416   PetscInt       *cmap,*idx_p;
1417 
1418   PetscFunctionBegin;
1419   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1420   mat->getrowactive = PETSC_TRUE;
1421 
1422   if (!mat->rowvalues && (idx || v)) {
1423     /*
1424         allocate enough space to hold information from the longest row.
1425     */
1426     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1427     PetscInt     max = 1,tmp;
1428     for (i=0; i<matin->rmap->n; i++) {
1429       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1430       if (max < tmp) { max = tmp; }
1431     }
1432     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1433     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1434   }
1435 
1436   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1437   lrow = row - rstart;
1438 
1439   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1440   if (!v)   {pvA = 0; pvB = 0;}
1441   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1442   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1443   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1444   nztot = nzA + nzB;
1445 
1446   cmap  = mat->garray;
1447   if (v  || idx) {
1448     if (nztot) {
1449       /* Sort by increasing column numbers, assuming A and B already sorted */
1450       PetscInt imark = -1;
1451       if (v) {
1452         *v = v_p = mat->rowvalues;
1453         for (i=0; i<nzB; i++) {
1454           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1455           else break;
1456         }
1457         imark = i;
1458         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1459         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1460       }
1461       if (idx) {
1462         *idx = idx_p = mat->rowindices;
1463         if (imark > -1) {
1464           for (i=0; i<imark; i++) {
1465             idx_p[i] = cmap[cworkB[i]];
1466           }
1467         } else {
1468           for (i=0; i<nzB; i++) {
1469             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1470             else break;
1471           }
1472           imark = i;
1473         }
1474         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1475         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1476       }
1477     } else {
1478       if (idx) *idx = 0;
1479       if (v)   *v   = 0;
1480     }
1481   }
1482   *nz = nztot;
1483   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1484   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1485   PetscFunctionReturn(0);
1486 }
1487 
1488 #undef __FUNCT__
1489 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1490 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1491 {
1492   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1493 
1494   PetscFunctionBegin;
1495   if (!aij->getrowactive) {
1496     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1497   }
1498   aij->getrowactive = PETSC_FALSE;
1499   PetscFunctionReturn(0);
1500 }
1501 
1502 #undef __FUNCT__
1503 #define __FUNCT__ "MatNorm_MPIAIJ"
1504 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1505 {
1506   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1507   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1508   PetscErrorCode ierr;
1509   PetscInt       i,j,cstart = mat->cmap->rstart;
1510   PetscReal      sum = 0.0;
1511   MatScalar      *v;
1512 
1513   PetscFunctionBegin;
1514   if (aij->size == 1) {
1515     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1516   } else {
1517     if (type == NORM_FROBENIUS) {
1518       v = amat->a;
1519       for (i=0; i<amat->nz; i++) {
1520 #if defined(PETSC_USE_COMPLEX)
1521         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1522 #else
1523         sum += (*v)*(*v); v++;
1524 #endif
1525       }
1526       v = bmat->a;
1527       for (i=0; i<bmat->nz; i++) {
1528 #if defined(PETSC_USE_COMPLEX)
1529         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1530 #else
1531         sum += (*v)*(*v); v++;
1532 #endif
1533       }
1534       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1535       *norm = sqrt(*norm);
1536     } else if (type == NORM_1) { /* max column norm */
1537       PetscReal *tmp,*tmp2;
1538       PetscInt  *jj,*garray = aij->garray;
1539       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1540       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1541       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1542       *norm = 0.0;
1543       v = amat->a; jj = amat->j;
1544       for (j=0; j<amat->nz; j++) {
1545         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1546       }
1547       v = bmat->a; jj = bmat->j;
1548       for (j=0; j<bmat->nz; j++) {
1549         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1550       }
1551       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1552       for (j=0; j<mat->cmap->N; j++) {
1553         if (tmp2[j] > *norm) *norm = tmp2[j];
1554       }
1555       ierr = PetscFree(tmp);CHKERRQ(ierr);
1556       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1557     } else if (type == NORM_INFINITY) { /* max row norm */
1558       PetscReal ntemp = 0.0;
1559       for (j=0; j<aij->A->rmap->n; j++) {
1560         v = amat->a + amat->i[j];
1561         sum = 0.0;
1562         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1563           sum += PetscAbsScalar(*v); v++;
1564         }
1565         v = bmat->a + bmat->i[j];
1566         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1567           sum += PetscAbsScalar(*v); v++;
1568         }
1569         if (sum > ntemp) ntemp = sum;
1570       }
1571       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1572     } else {
1573       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1574     }
1575   }
1576   PetscFunctionReturn(0);
1577 }
1578 
1579 #undef __FUNCT__
1580 #define __FUNCT__ "MatTranspose_MPIAIJ"
1581 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1582 {
1583   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1584   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1585   PetscErrorCode ierr;
1586   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1587   PetscInt       cstart=A->cmap->rstart,ncol;
1588   Mat            B;
1589   MatScalar      *array;
1590 
1591   PetscFunctionBegin;
1592   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1593 
1594   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1595   ai = Aloc->i; aj = Aloc->j;
1596   bi = Bloc->i; bj = Bloc->j;
1597   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1598     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1599     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1600     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1601     for (i=0; i<ai[ma]; i++){
1602       d_nnz[aj[i]] ++;
1603       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1604     }
1605 
1606     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1607     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1608     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1609     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1610     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1611   } else {
1612     B = *matout;
1613   }
1614 
1615   /* copy over the A part */
1616   array = Aloc->a;
1617   row = A->rmap->rstart;
1618   for (i=0; i<ma; i++) {
1619     ncol = ai[i+1]-ai[i];
1620     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1621     row++; array += ncol; aj += ncol;
1622   }
1623   aj = Aloc->j;
1624   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1625 
1626   /* copy over the B part */
1627   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1628   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1629   array = Bloc->a;
1630   row = A->rmap->rstart;
1631   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1632   cols_tmp = cols;
1633   for (i=0; i<mb; i++) {
1634     ncol = bi[i+1]-bi[i];
1635     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1636     row++; array += ncol; cols_tmp += ncol;
1637   }
1638   ierr = PetscFree(cols);CHKERRQ(ierr);
1639 
1640   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1641   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1642   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1643     *matout = B;
1644   } else {
1645     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1646   }
1647   PetscFunctionReturn(0);
1648 }
1649 
1650 #undef __FUNCT__
1651 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1652 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1653 {
1654   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1655   Mat            a = aij->A,b = aij->B;
1656   PetscErrorCode ierr;
1657   PetscInt       s1,s2,s3;
1658 
1659   PetscFunctionBegin;
1660   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1661   if (rr) {
1662     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1663     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1664     /* Overlap communication with computation. */
1665     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1666   }
1667   if (ll) {
1668     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1669     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1670     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1671   }
1672   /* scale  the diagonal block */
1673   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1674 
1675   if (rr) {
1676     /* Do a scatter end and then right scale the off-diagonal block */
1677     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1678     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1679   }
1680 
1681   PetscFunctionReturn(0);
1682 }
1683 
1684 #undef __FUNCT__
1685 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1686 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1687 {
1688   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1689   PetscErrorCode ierr;
1690 
1691   PetscFunctionBegin;
1692   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1693   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1694   PetscFunctionReturn(0);
1695 }
1696 #undef __FUNCT__
1697 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1698 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1699 {
1700   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1701   PetscErrorCode ierr;
1702 
1703   PetscFunctionBegin;
1704   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1705   PetscFunctionReturn(0);
1706 }
1707 
1708 #undef __FUNCT__
1709 #define __FUNCT__ "MatEqual_MPIAIJ"
1710 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1711 {
1712   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1713   Mat            a,b,c,d;
1714   PetscTruth     flg;
1715   PetscErrorCode ierr;
1716 
1717   PetscFunctionBegin;
1718   a = matA->A; b = matA->B;
1719   c = matB->A; d = matB->B;
1720 
1721   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1722   if (flg) {
1723     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1724   }
1725   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1726   PetscFunctionReturn(0);
1727 }
1728 
1729 #undef __FUNCT__
1730 #define __FUNCT__ "MatCopy_MPIAIJ"
1731 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1732 {
1733   PetscErrorCode ierr;
1734   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1735   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1736 
1737   PetscFunctionBegin;
1738   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1739   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1740     /* because of the column compression in the off-processor part of the matrix a->B,
1741        the number of columns in a->B and b->B may be different, hence we cannot call
1742        the MatCopy() directly on the two parts. If need be, we can provide a more
1743        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1744        then copying the submatrices */
1745     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1746   } else {
1747     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1748     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1749   }
1750   PetscFunctionReturn(0);
1751 }
1752 
1753 #undef __FUNCT__
1754 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1755 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1756 {
1757   PetscErrorCode ierr;
1758 
1759   PetscFunctionBegin;
1760   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1761   PetscFunctionReturn(0);
1762 }
1763 
1764 #include "petscblaslapack.h"
1765 #undef __FUNCT__
1766 #define __FUNCT__ "MatAXPY_MPIAIJ"
1767 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1768 {
1769   PetscErrorCode ierr;
1770   PetscInt       i;
1771   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1772   PetscBLASInt   bnz,one=1;
1773   Mat_SeqAIJ     *x,*y;
1774 
1775   PetscFunctionBegin;
1776   if (str == SAME_NONZERO_PATTERN) {
1777     PetscScalar alpha = a;
1778     x = (Mat_SeqAIJ *)xx->A->data;
1779     y = (Mat_SeqAIJ *)yy->A->data;
1780     bnz = PetscBLASIntCast(x->nz);
1781     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1782     x = (Mat_SeqAIJ *)xx->B->data;
1783     y = (Mat_SeqAIJ *)yy->B->data;
1784     bnz = PetscBLASIntCast(x->nz);
1785     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1786   } else if (str == SUBSET_NONZERO_PATTERN) {
1787     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1788 
1789     x = (Mat_SeqAIJ *)xx->B->data;
1790     y = (Mat_SeqAIJ *)yy->B->data;
1791     if (y->xtoy && y->XtoY != xx->B) {
1792       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1793       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1794     }
1795     if (!y->xtoy) { /* get xtoy */
1796       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1797       y->XtoY = xx->B;
1798       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1799     }
1800     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1801   } else {
1802     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1803   }
1804   PetscFunctionReturn(0);
1805 }
1806 
1807 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1808 
1809 #undef __FUNCT__
1810 #define __FUNCT__ "MatConjugate_MPIAIJ"
1811 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1812 {
1813 #if defined(PETSC_USE_COMPLEX)
1814   PetscErrorCode ierr;
1815   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1816 
1817   PetscFunctionBegin;
1818   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1819   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1820 #else
1821   PetscFunctionBegin;
1822 #endif
1823   PetscFunctionReturn(0);
1824 }
1825 
1826 #undef __FUNCT__
1827 #define __FUNCT__ "MatRealPart_MPIAIJ"
1828 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1829 {
1830   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1831   PetscErrorCode ierr;
1832 
1833   PetscFunctionBegin;
1834   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1835   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1836   PetscFunctionReturn(0);
1837 }
1838 
1839 #undef __FUNCT__
1840 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1841 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1842 {
1843   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1844   PetscErrorCode ierr;
1845 
1846   PetscFunctionBegin;
1847   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1848   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1849   PetscFunctionReturn(0);
1850 }
1851 
1852 #ifdef PETSC_HAVE_PBGL
1853 
1854 #include <boost/parallel/mpi/bsp_process_group.hpp>
1855 #include <boost/graph/distributed/ilu_default_graph.hpp>
1856 #include <boost/graph/distributed/ilu_0_block.hpp>
1857 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1858 #include <boost/graph/distributed/petsc/interface.hpp>
1859 #include <boost/multi_array.hpp>
1860 #include <boost/parallel/distributed_property_map->hpp>
1861 
1862 #undef __FUNCT__
1863 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1864 /*
1865   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1866 */
1867 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1868 {
1869   namespace petsc = boost::distributed::petsc;
1870 
1871   namespace graph_dist = boost::graph::distributed;
1872   using boost::graph::distributed::ilu_default::process_group_type;
1873   using boost::graph::ilu_permuted;
1874 
1875   PetscTruth      row_identity, col_identity;
1876   PetscContainer  c;
1877   PetscInt        m, n, M, N;
1878   PetscErrorCode  ierr;
1879 
1880   PetscFunctionBegin;
1881   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1882   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1883   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1884   if (!row_identity || !col_identity) {
1885     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1886   }
1887 
1888   process_group_type pg;
1889   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1890   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1891   lgraph_type&   level_graph = *lgraph_p;
1892   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1893 
1894   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1895   ilu_permuted(level_graph);
1896 
1897   /* put together the new matrix */
1898   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1899   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1900   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1901   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1902   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1903   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1904   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1905 
1906   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1907   ierr = PetscContainerSetPointer(c, lgraph_p);
1908   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1909   PetscFunctionReturn(0);
1910 }
1911 
1912 #undef __FUNCT__
1913 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1914 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1915 {
1916   PetscFunctionBegin;
1917   PetscFunctionReturn(0);
1918 }
1919 
1920 #undef __FUNCT__
1921 #define __FUNCT__ "MatSolve_MPIAIJ"
1922 /*
1923   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1924 */
1925 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1926 {
1927   namespace graph_dist = boost::graph::distributed;
1928 
1929   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1930   lgraph_type*   lgraph_p;
1931   PetscContainer c;
1932   PetscErrorCode ierr;
1933 
1934   PetscFunctionBegin;
1935   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1936   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1937   ierr = VecCopy(b, x);CHKERRQ(ierr);
1938 
1939   PetscScalar* array_x;
1940   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1941   PetscInt sx;
1942   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1943 
1944   PetscScalar* array_b;
1945   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1946   PetscInt sb;
1947   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1948 
1949   lgraph_type&   level_graph = *lgraph_p;
1950   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1951 
1952   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
1953   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
1954                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
1955 
1956   typedef boost::iterator_property_map<array_ref_type::iterator,
1957                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
1958   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
1959                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
1960 
1961   ilu_set_solve(*lgraph_p, vector_b, vector_x);
1962 
1963   PetscFunctionReturn(0);
1964 }
1965 #endif
1966 
1967 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
1968   PetscInt       nzlocal,nsends,nrecvs;
1969   PetscMPIInt    *send_rank;
1970   PetscInt       *sbuf_nz,*sbuf_j,**rbuf_j;
1971   PetscScalar    *sbuf_a,**rbuf_a;
1972   PetscErrorCode (*MatDestroy)(Mat);
1973 } Mat_Redundant;
1974 
1975 #undef __FUNCT__
1976 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
1977 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
1978 {
1979   PetscErrorCode       ierr;
1980   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
1981   PetscInt             i;
1982 
1983   PetscFunctionBegin;
1984   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
1985   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1986   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1987   for (i=0; i<redund->nrecvs; i++){
1988     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1989     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1990   }
1991   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1992   ierr = PetscFree(redund);CHKERRQ(ierr);
1993   PetscFunctionReturn(0);
1994 }
1995 
1996 #undef __FUNCT__
1997 #define __FUNCT__ "MatDestroy_MatRedundant"
1998 PetscErrorCode MatDestroy_MatRedundant(Mat A)
1999 {
2000   PetscErrorCode  ierr;
2001   PetscContainer  container;
2002   Mat_Redundant   *redund=PETSC_NULL;
2003 
2004   PetscFunctionBegin;
2005   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2006   if (container) {
2007     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2008   } else {
2009     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2010   }
2011   A->ops->destroy = redund->MatDestroy;
2012   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2013   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2014   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2015   PetscFunctionReturn(0);
2016 }
2017 
2018 #undef __FUNCT__
2019 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2020 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2021 {
2022   PetscMPIInt    rank,size;
2023   MPI_Comm       comm=((PetscObject)mat)->comm;
2024   PetscErrorCode ierr;
2025   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2026   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2027   PetscInt       *rowrange=mat->rmap->range;
2028   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2029   Mat            A=aij->A,B=aij->B,C=*matredundant;
2030   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2031   PetscScalar    *sbuf_a;
2032   PetscInt       nzlocal=a->nz+b->nz;
2033   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2034   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2035   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2036   MatScalar      *aworkA,*aworkB;
2037   PetscScalar    *vals;
2038   PetscMPIInt    tag1,tag2,tag3,imdex;
2039   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2040                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2041   MPI_Status     recv_status,*send_status;
2042   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2043   PetscInt       **rbuf_j=PETSC_NULL;
2044   PetscScalar    **rbuf_a=PETSC_NULL;
2045   Mat_Redundant  *redund=PETSC_NULL;
2046   PetscContainer container;
2047 
2048   PetscFunctionBegin;
2049   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2050   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2051 
2052   if (reuse == MAT_REUSE_MATRIX) {
2053     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2054     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2055     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2056     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2057     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2058     if (container) {
2059       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2060     } else {
2061       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2062     }
2063     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2064 
2065     nsends    = redund->nsends;
2066     nrecvs    = redund->nrecvs;
2067     send_rank = redund->send_rank; recv_rank = send_rank + size;
2068     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
2069     sbuf_j    = redund->sbuf_j;
2070     sbuf_a    = redund->sbuf_a;
2071     rbuf_j    = redund->rbuf_j;
2072     rbuf_a    = redund->rbuf_a;
2073   }
2074 
2075   if (reuse == MAT_INITIAL_MATRIX){
2076     PetscMPIInt  subrank,subsize;
2077     PetscInt     nleftover,np_subcomm;
2078     /* get the destination processors' id send_rank, nsends and nrecvs */
2079     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2080     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2081     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
2082     recv_rank = send_rank + size;
2083     np_subcomm = size/nsubcomm;
2084     nleftover  = size - nsubcomm*np_subcomm;
2085     nsends = 0; nrecvs = 0;
2086     for (i=0; i<size; i++){ /* i=rank*/
2087       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2088         send_rank[nsends] = i; nsends++;
2089         recv_rank[nrecvs++] = i;
2090       }
2091     }
2092     if (rank >= size - nleftover){/* this proc is a leftover processor */
2093       i = size-nleftover-1;
2094       j = 0;
2095       while (j < nsubcomm - nleftover){
2096         send_rank[nsends++] = i;
2097         i--; j++;
2098       }
2099     }
2100 
2101     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2102       for (i=0; i<nleftover; i++){
2103         recv_rank[nrecvs++] = size-nleftover+i;
2104       }
2105     }
2106 
2107     /* allocate sbuf_j, sbuf_a */
2108     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2109     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2110     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2111   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2112 
2113   /* copy mat's local entries into the buffers */
2114   if (reuse == MAT_INITIAL_MATRIX){
2115     rownz_max = 0;
2116     rptr = sbuf_j;
2117     cols = sbuf_j + rend-rstart + 1;
2118     vals = sbuf_a;
2119     rptr[0] = 0;
2120     for (i=0; i<rend-rstart; i++){
2121       row = i + rstart;
2122       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2123       ncols  = nzA + nzB;
2124       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2125       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2126       /* load the column indices for this row into cols */
2127       lwrite = 0;
2128       for (l=0; l<nzB; l++) {
2129         if ((ctmp = bmap[cworkB[l]]) < cstart){
2130           vals[lwrite]   = aworkB[l];
2131           cols[lwrite++] = ctmp;
2132         }
2133       }
2134       for (l=0; l<nzA; l++){
2135         vals[lwrite]   = aworkA[l];
2136         cols[lwrite++] = cstart + cworkA[l];
2137       }
2138       for (l=0; l<nzB; l++) {
2139         if ((ctmp = bmap[cworkB[l]]) >= cend){
2140           vals[lwrite]   = aworkB[l];
2141           cols[lwrite++] = ctmp;
2142         }
2143       }
2144       vals += ncols;
2145       cols += ncols;
2146       rptr[i+1] = rptr[i] + ncols;
2147       if (rownz_max < ncols) rownz_max = ncols;
2148     }
2149     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2150   } else { /* only copy matrix values into sbuf_a */
2151     rptr = sbuf_j;
2152     vals = sbuf_a;
2153     rptr[0] = 0;
2154     for (i=0; i<rend-rstart; i++){
2155       row = i + rstart;
2156       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2157       ncols  = nzA + nzB;
2158       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2159       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2160       lwrite = 0;
2161       for (l=0; l<nzB; l++) {
2162         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2163       }
2164       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2165       for (l=0; l<nzB; l++) {
2166         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2167       }
2168       vals += ncols;
2169       rptr[i+1] = rptr[i] + ncols;
2170     }
2171   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2172 
2173   /* send nzlocal to others, and recv other's nzlocal */
2174   /*--------------------------------------------------*/
2175   if (reuse == MAT_INITIAL_MATRIX){
2176     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2177     s_waits2 = s_waits3 + nsends;
2178     s_waits1 = s_waits2 + nsends;
2179     r_waits1 = s_waits1 + nsends;
2180     r_waits2 = r_waits1 + nrecvs;
2181     r_waits3 = r_waits2 + nrecvs;
2182   } else {
2183     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2184     r_waits3 = s_waits3 + nsends;
2185   }
2186 
2187   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2188   if (reuse == MAT_INITIAL_MATRIX){
2189     /* get new tags to keep the communication clean */
2190     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2191     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2192     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2193     rbuf_nz = sbuf_nz + nsends;
2194 
2195     /* post receives of other's nzlocal */
2196     for (i=0; i<nrecvs; i++){
2197       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2198     }
2199     /* send nzlocal to others */
2200     for (i=0; i<nsends; i++){
2201       sbuf_nz[i] = nzlocal;
2202       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2203     }
2204     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2205     count = nrecvs;
2206     while (count) {
2207       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2208       recv_rank[imdex] = recv_status.MPI_SOURCE;
2209       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2210       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2211 
2212       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2213       rbuf_nz[imdex] += i + 2;
2214       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2215       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2216       count--;
2217     }
2218     /* wait on sends of nzlocal */
2219     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2220     /* send mat->i,j to others, and recv from other's */
2221     /*------------------------------------------------*/
2222     for (i=0; i<nsends; i++){
2223       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2224       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2225     }
2226     /* wait on receives of mat->i,j */
2227     /*------------------------------*/
2228     count = nrecvs;
2229     while (count) {
2230       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2231       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2232       count--;
2233     }
2234     /* wait on sends of mat->i,j */
2235     /*---------------------------*/
2236     if (nsends) {
2237       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2238     }
2239   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2240 
2241   /* post receives, send and receive mat->a */
2242   /*----------------------------------------*/
2243   for (imdex=0; imdex<nrecvs; imdex++) {
2244     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2245   }
2246   for (i=0; i<nsends; i++){
2247     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2248   }
2249   count = nrecvs;
2250   while (count) {
2251     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2252     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2253     count--;
2254   }
2255   if (nsends) {
2256     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2257   }
2258 
2259   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2260 
2261   /* create redundant matrix */
2262   /*-------------------------*/
2263   if (reuse == MAT_INITIAL_MATRIX){
2264     /* compute rownz_max for preallocation */
2265     for (imdex=0; imdex<nrecvs; imdex++){
2266       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2267       rptr = rbuf_j[imdex];
2268       for (i=0; i<j; i++){
2269         ncols = rptr[i+1] - rptr[i];
2270         if (rownz_max < ncols) rownz_max = ncols;
2271       }
2272     }
2273 
2274     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2275     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2276     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2277     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2278     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2279   } else {
2280     C = *matredundant;
2281   }
2282 
2283   /* insert local matrix entries */
2284   rptr = sbuf_j;
2285   cols = sbuf_j + rend-rstart + 1;
2286   vals = sbuf_a;
2287   for (i=0; i<rend-rstart; i++){
2288     row   = i + rstart;
2289     ncols = rptr[i+1] - rptr[i];
2290     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2291     vals += ncols;
2292     cols += ncols;
2293   }
2294   /* insert received matrix entries */
2295   for (imdex=0; imdex<nrecvs; imdex++){
2296     rstart = rowrange[recv_rank[imdex]];
2297     rend   = rowrange[recv_rank[imdex]+1];
2298     rptr = rbuf_j[imdex];
2299     cols = rbuf_j[imdex] + rend-rstart + 1;
2300     vals = rbuf_a[imdex];
2301     for (i=0; i<rend-rstart; i++){
2302       row   = i + rstart;
2303       ncols = rptr[i+1] - rptr[i];
2304       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2305       vals += ncols;
2306       cols += ncols;
2307     }
2308   }
2309   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2310   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2311   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2312   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2313   if (reuse == MAT_INITIAL_MATRIX){
2314     PetscContainer container;
2315     *matredundant = C;
2316     /* create a supporting struct and attach it to C for reuse */
2317     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2318     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2319     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2320     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2321     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2322 
2323     redund->nzlocal = nzlocal;
2324     redund->nsends  = nsends;
2325     redund->nrecvs  = nrecvs;
2326     redund->send_rank = send_rank;
2327     redund->sbuf_nz = sbuf_nz;
2328     redund->sbuf_j  = sbuf_j;
2329     redund->sbuf_a  = sbuf_a;
2330     redund->rbuf_j  = rbuf_j;
2331     redund->rbuf_a  = rbuf_a;
2332 
2333     redund->MatDestroy = C->ops->destroy;
2334     C->ops->destroy    = MatDestroy_MatRedundant;
2335   }
2336   PetscFunctionReturn(0);
2337 }
2338 
2339 #undef __FUNCT__
2340 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2341 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2342 {
2343   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2344   PetscErrorCode ierr;
2345   PetscInt       i,*idxb = 0;
2346   PetscScalar    *va,*vb;
2347   Vec            vtmp;
2348 
2349   PetscFunctionBegin;
2350   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2351   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2352   if (idx) {
2353     for (i=0; i<A->rmap->n; i++) {
2354       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2355     }
2356   }
2357 
2358   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2359   if (idx) {
2360     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2361   }
2362   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2363   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2364 
2365   for (i=0; i<A->rmap->n; i++){
2366     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2367       va[i] = vb[i];
2368       if (idx) idx[i] = a->garray[idxb[i]];
2369     }
2370   }
2371 
2372   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2373   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2374   if (idxb) {
2375     ierr = PetscFree(idxb);CHKERRQ(ierr);
2376   }
2377   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2378   PetscFunctionReturn(0);
2379 }
2380 
2381 #undef __FUNCT__
2382 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2383 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2384 {
2385   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2386   PetscErrorCode ierr;
2387   PetscInt       i,*idxb = 0;
2388   PetscScalar    *va,*vb;
2389   Vec            vtmp;
2390 
2391   PetscFunctionBegin;
2392   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2393   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2394   if (idx) {
2395     for (i=0; i<A->cmap->n; i++) {
2396       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2397     }
2398   }
2399 
2400   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2401   if (idx) {
2402     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2403   }
2404   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2405   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2406 
2407   for (i=0; i<A->rmap->n; i++){
2408     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2409       va[i] = vb[i];
2410       if (idx) idx[i] = a->garray[idxb[i]];
2411     }
2412   }
2413 
2414   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2415   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2416   if (idxb) {
2417     ierr = PetscFree(idxb);CHKERRQ(ierr);
2418   }
2419   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2420   PetscFunctionReturn(0);
2421 }
2422 
2423 #undef __FUNCT__
2424 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2425 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2426 {
2427   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2428   PetscInt       n      = A->rmap->n;
2429   PetscInt       cstart = A->cmap->rstart;
2430   PetscInt      *cmap   = mat->garray;
2431   PetscInt      *diagIdx, *offdiagIdx;
2432   Vec            diagV, offdiagV;
2433   PetscScalar   *a, *diagA, *offdiagA;
2434   PetscInt       r;
2435   PetscErrorCode ierr;
2436 
2437   PetscFunctionBegin;
2438   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2439   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2440   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2441   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2442   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2443   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2444   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2445   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2446   for(r = 0; r < n; ++r) {
2447     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2448       a[r]   = diagA[r];
2449       idx[r] = cstart + diagIdx[r];
2450     } else {
2451       a[r]   = offdiagA[r];
2452       idx[r] = cmap[offdiagIdx[r]];
2453     }
2454   }
2455   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2456   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2457   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2458   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2459   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2460   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2461   PetscFunctionReturn(0);
2462 }
2463 
2464 #undef __FUNCT__
2465 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2466 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2467 {
2468   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2469   PetscInt       n      = A->rmap->n;
2470   PetscInt       cstart = A->cmap->rstart;
2471   PetscInt      *cmap   = mat->garray;
2472   PetscInt      *diagIdx, *offdiagIdx;
2473   Vec            diagV, offdiagV;
2474   PetscScalar   *a, *diagA, *offdiagA;
2475   PetscInt       r;
2476   PetscErrorCode ierr;
2477 
2478   PetscFunctionBegin;
2479   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2480   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2481   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2482   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2483   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2484   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2485   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2486   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2487   for(r = 0; r < n; ++r) {
2488     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2489       a[r]   = diagA[r];
2490       idx[r] = cstart + diagIdx[r];
2491     } else {
2492       a[r]   = offdiagA[r];
2493       idx[r] = cmap[offdiagIdx[r]];
2494     }
2495   }
2496   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2497   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2498   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2499   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2500   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2501   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2502   PetscFunctionReturn(0);
2503 }
2504 
2505 #undef __FUNCT__
2506 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2507 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[])
2508 {
2509   PetscErrorCode ierr;
2510 
2511   PetscFunctionBegin;
2512   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
2513   PetscFunctionReturn(0);
2514 }
2515 
2516 /* -------------------------------------------------------------------*/
2517 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2518        MatGetRow_MPIAIJ,
2519        MatRestoreRow_MPIAIJ,
2520        MatMult_MPIAIJ,
2521 /* 4*/ MatMultAdd_MPIAIJ,
2522        MatMultTranspose_MPIAIJ,
2523        MatMultTransposeAdd_MPIAIJ,
2524 #ifdef PETSC_HAVE_PBGL
2525        MatSolve_MPIAIJ,
2526 #else
2527        0,
2528 #endif
2529        0,
2530        0,
2531 /*10*/ 0,
2532        0,
2533        0,
2534        MatRelax_MPIAIJ,
2535        MatTranspose_MPIAIJ,
2536 /*15*/ MatGetInfo_MPIAIJ,
2537        MatEqual_MPIAIJ,
2538        MatGetDiagonal_MPIAIJ,
2539        MatDiagonalScale_MPIAIJ,
2540        MatNorm_MPIAIJ,
2541 /*20*/ MatAssemblyBegin_MPIAIJ,
2542        MatAssemblyEnd_MPIAIJ,
2543        0,
2544        MatSetOption_MPIAIJ,
2545        MatZeroEntries_MPIAIJ,
2546 /*25*/ MatZeroRows_MPIAIJ,
2547        0,
2548 #ifdef PETSC_HAVE_PBGL
2549        0,
2550 #else
2551        0,
2552 #endif
2553        0,
2554        0,
2555 /*30*/ MatSetUpPreallocation_MPIAIJ,
2556 #ifdef PETSC_HAVE_PBGL
2557        0,
2558 #else
2559        0,
2560 #endif
2561        0,
2562        0,
2563        0,
2564 /*35*/ MatDuplicate_MPIAIJ,
2565        0,
2566        0,
2567        0,
2568        0,
2569 /*40*/ MatAXPY_MPIAIJ,
2570        MatGetSubMatrices_MPIAIJ,
2571        MatIncreaseOverlap_MPIAIJ,
2572        MatGetValues_MPIAIJ,
2573        MatCopy_MPIAIJ,
2574 /*45*/ MatGetRowMax_MPIAIJ,
2575        MatScale_MPIAIJ,
2576        0,
2577        0,
2578        0,
2579 /*50*/ MatSetBlockSize_MPIAIJ,
2580        0,
2581        0,
2582        0,
2583        0,
2584 /*55*/ MatFDColoringCreate_MPIAIJ,
2585        0,
2586        MatSetUnfactored_MPIAIJ,
2587        MatPermute_MPIAIJ,
2588        0,
2589 /*60*/ MatGetSubMatrix_MPIAIJ,
2590        MatDestroy_MPIAIJ,
2591        MatView_MPIAIJ,
2592        0,
2593        0,
2594 /*65*/ 0,
2595        0,
2596        0,
2597        0,
2598        0,
2599 /*70*/ MatGetRowMaxAbs_MPIAIJ,
2600        MatGetRowMinAbs_MPIAIJ,
2601        0,
2602        MatSetColoring_MPIAIJ,
2603 #if defined(PETSC_HAVE_ADIC)
2604        MatSetValuesAdic_MPIAIJ,
2605 #else
2606        0,
2607 #endif
2608        MatSetValuesAdifor_MPIAIJ,
2609 /*75*/ 0,
2610        0,
2611        0,
2612        0,
2613        0,
2614 /*80*/ 0,
2615        0,
2616        0,
2617 /*84*/ MatLoad_MPIAIJ,
2618        0,
2619        0,
2620        0,
2621        0,
2622        0,
2623 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
2624        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2625        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2626        MatPtAP_Basic,
2627        MatPtAPSymbolic_MPIAIJ,
2628 /*95*/ MatPtAPNumeric_MPIAIJ,
2629        0,
2630        0,
2631        0,
2632        0,
2633 /*100*/0,
2634        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2635        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2636        MatConjugate_MPIAIJ,
2637        0,
2638 /*105*/MatSetValuesRow_MPIAIJ,
2639        MatRealPart_MPIAIJ,
2640        MatImaginaryPart_MPIAIJ,
2641        0,
2642        0,
2643 /*110*/0,
2644        MatGetRedundantMatrix_MPIAIJ,
2645        MatGetRowMin_MPIAIJ,
2646        0,
2647        0,
2648 /*115*/MatGetSeqNonzerostructure_MPIAIJ};
2649 
2650 /* ----------------------------------------------------------------------------------------*/
2651 
2652 EXTERN_C_BEGIN
2653 #undef __FUNCT__
2654 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2655 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2656 {
2657   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2658   PetscErrorCode ierr;
2659 
2660   PetscFunctionBegin;
2661   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2662   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2663   PetscFunctionReturn(0);
2664 }
2665 EXTERN_C_END
2666 
2667 EXTERN_C_BEGIN
2668 #undef __FUNCT__
2669 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2670 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2671 {
2672   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2673   PetscErrorCode ierr;
2674 
2675   PetscFunctionBegin;
2676   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2677   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2678   PetscFunctionReturn(0);
2679 }
2680 EXTERN_C_END
2681 
2682 #include "petscpc.h"
2683 EXTERN_C_BEGIN
2684 #undef __FUNCT__
2685 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2686 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2687 {
2688   Mat_MPIAIJ     *b;
2689   PetscErrorCode ierr;
2690   PetscInt       i;
2691 
2692   PetscFunctionBegin;
2693   B->preallocated = PETSC_TRUE;
2694   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2695   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2696   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2697   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2698 
2699   ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2700   ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2701   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2702   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2703   if (d_nnz) {
2704     for (i=0; i<B->rmap->n; i++) {
2705       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2706     }
2707   }
2708   if (o_nnz) {
2709     for (i=0; i<B->rmap->n; i++) {
2710       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2711     }
2712   }
2713   b = (Mat_MPIAIJ*)B->data;
2714 
2715   /* Explicitly create 2 MATSEQAIJ matrices. */
2716   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2717   ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2718   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2719   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2720   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2721   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2722   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2723   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2724 
2725   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2726   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2727 
2728   PetscFunctionReturn(0);
2729 }
2730 EXTERN_C_END
2731 
2732 #undef __FUNCT__
2733 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2734 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2735 {
2736   Mat            mat;
2737   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2738   PetscErrorCode ierr;
2739 
2740   PetscFunctionBegin;
2741   *newmat       = 0;
2742   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2743   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2744   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2745   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2746   a    = (Mat_MPIAIJ*)mat->data;
2747 
2748   mat->factor       = matin->factor;
2749   mat->rmap->bs      = matin->rmap->bs;
2750   mat->assembled    = PETSC_TRUE;
2751   mat->insertmode   = NOT_SET_VALUES;
2752   mat->preallocated = PETSC_TRUE;
2753 
2754   a->size           = oldmat->size;
2755   a->rank           = oldmat->rank;
2756   a->donotstash     = oldmat->donotstash;
2757   a->roworiented    = oldmat->roworiented;
2758   a->rowindices     = 0;
2759   a->rowvalues      = 0;
2760   a->getrowactive   = PETSC_FALSE;
2761 
2762   ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr);
2763   ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr);
2764 
2765   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2766   if (oldmat->colmap) {
2767 #if defined (PETSC_USE_CTABLE)
2768     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2769 #else
2770     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2771     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2772     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2773 #endif
2774   } else a->colmap = 0;
2775   if (oldmat->garray) {
2776     PetscInt len;
2777     len  = oldmat->B->cmap->n;
2778     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2779     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2780     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2781   } else a->garray = 0;
2782 
2783   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2784   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2785   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2786   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2787   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2788   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2789   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2790   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2791   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2792   *newmat = mat;
2793   PetscFunctionReturn(0);
2794 }
2795 
2796 #include "petscsys.h"
2797 
2798 #undef __FUNCT__
2799 #define __FUNCT__ "MatLoad_MPIAIJ"
2800 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2801 {
2802   Mat            A;
2803   PetscScalar    *vals,*svals;
2804   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2805   MPI_Status     status;
2806   PetscErrorCode ierr;
2807   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
2808   PetscInt       i,nz,j,rstart,rend,mmax;
2809   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2810   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2811   PetscInt       cend,cstart,n,*rowners;
2812   int            fd;
2813 
2814   PetscFunctionBegin;
2815   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2816   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2817   if (!rank) {
2818     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2819     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2820     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2821   }
2822 
2823   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2824   M = header[1]; N = header[2];
2825   /* determine ownership of all rows */
2826   m    = M/size + ((M % size) > rank);
2827   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2828   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2829 
2830   /* First process needs enough room for process with most rows */
2831   if (!rank) {
2832     mmax       = rowners[1];
2833     for (i=2; i<size; i++) {
2834       mmax = PetscMax(mmax,rowners[i]);
2835     }
2836   } else mmax = m;
2837 
2838   rowners[0] = 0;
2839   for (i=2; i<=size; i++) {
2840     rowners[i] += rowners[i-1];
2841   }
2842   rstart = rowners[rank];
2843   rend   = rowners[rank+1];
2844 
2845   /* distribute row lengths to all processors */
2846   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2847   if (!rank) {
2848     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2849     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2850     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2851     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2852     for (j=0; j<m; j++) {
2853       procsnz[0] += ourlens[j];
2854     }
2855     for (i=1; i<size; i++) {
2856       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2857       /* calculate the number of nonzeros on each processor */
2858       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2859         procsnz[i] += rowlengths[j];
2860       }
2861       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2862     }
2863     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2864   } else {
2865     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2866   }
2867 
2868   if (!rank) {
2869     /* determine max buffer needed and allocate it */
2870     maxnz = 0;
2871     for (i=0; i<size; i++) {
2872       maxnz = PetscMax(maxnz,procsnz[i]);
2873     }
2874     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2875 
2876     /* read in my part of the matrix column indices  */
2877     nz   = procsnz[0];
2878     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2879     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2880 
2881     /* read in every one elses and ship off */
2882     for (i=1; i<size; i++) {
2883       nz   = procsnz[i];
2884       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2885       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2886     }
2887     ierr = PetscFree(cols);CHKERRQ(ierr);
2888   } else {
2889     /* determine buffer space needed for message */
2890     nz = 0;
2891     for (i=0; i<m; i++) {
2892       nz += ourlens[i];
2893     }
2894     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2895 
2896     /* receive message of column indices*/
2897     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2898     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2899     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2900   }
2901 
2902   /* determine column ownership if matrix is not square */
2903   if (N != M) {
2904     n      = N/size + ((N % size) > rank);
2905     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2906     cstart = cend - n;
2907   } else {
2908     cstart = rstart;
2909     cend   = rend;
2910     n      = cend - cstart;
2911   }
2912 
2913   /* loop over local rows, determining number of off diagonal entries */
2914   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2915   jj = 0;
2916   for (i=0; i<m; i++) {
2917     for (j=0; j<ourlens[i]; j++) {
2918       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2919       jj++;
2920     }
2921   }
2922 
2923   /* create our matrix */
2924   for (i=0; i<m; i++) {
2925     ourlens[i] -= offlens[i];
2926   }
2927   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2928   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2929   ierr = MatSetType(A,type);CHKERRQ(ierr);
2930   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2931 
2932   for (i=0; i<m; i++) {
2933     ourlens[i] += offlens[i];
2934   }
2935 
2936   if (!rank) {
2937     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2938 
2939     /* read in my part of the matrix numerical values  */
2940     nz   = procsnz[0];
2941     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2942 
2943     /* insert into matrix */
2944     jj      = rstart;
2945     smycols = mycols;
2946     svals   = vals;
2947     for (i=0; i<m; i++) {
2948       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2949       smycols += ourlens[i];
2950       svals   += ourlens[i];
2951       jj++;
2952     }
2953 
2954     /* read in other processors and ship out */
2955     for (i=1; i<size; i++) {
2956       nz   = procsnz[i];
2957       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2958       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2959     }
2960     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2961   } else {
2962     /* receive numeric values */
2963     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2964 
2965     /* receive message of values*/
2966     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2967     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2968     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2969 
2970     /* insert into matrix */
2971     jj      = rstart;
2972     smycols = mycols;
2973     svals   = vals;
2974     for (i=0; i<m; i++) {
2975       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2976       smycols += ourlens[i];
2977       svals   += ourlens[i];
2978       jj++;
2979     }
2980   }
2981   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2982   ierr = PetscFree(vals);CHKERRQ(ierr);
2983   ierr = PetscFree(mycols);CHKERRQ(ierr);
2984   ierr = PetscFree(rowners);CHKERRQ(ierr);
2985 
2986   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2987   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2988   *newmat = A;
2989   PetscFunctionReturn(0);
2990 }
2991 
2992 #undef __FUNCT__
2993 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2994 /*
2995     Not great since it makes two copies of the submatrix, first an SeqAIJ
2996   in local and then by concatenating the local matrices the end result.
2997   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2998 */
2999 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3000 {
3001   PetscErrorCode ierr;
3002   PetscMPIInt    rank,size;
3003   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3004   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3005   Mat            *local,M,Mreuse;
3006   MatScalar      *vwork,*aa;
3007   MPI_Comm       comm = ((PetscObject)mat)->comm;
3008   Mat_SeqAIJ     *aij;
3009 
3010 
3011   PetscFunctionBegin;
3012   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3013   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3014 
3015   if (call ==  MAT_REUSE_MATRIX) {
3016     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3017     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3018     local = &Mreuse;
3019     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3020   } else {
3021     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3022     Mreuse = *local;
3023     ierr   = PetscFree(local);CHKERRQ(ierr);
3024   }
3025 
3026   /*
3027       m - number of local rows
3028       n - number of columns (same on all processors)
3029       rstart - first row in new global matrix generated
3030   */
3031   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3032   if (call == MAT_INITIAL_MATRIX) {
3033     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3034     ii  = aij->i;
3035     jj  = aij->j;
3036 
3037     /*
3038         Determine the number of non-zeros in the diagonal and off-diagonal
3039         portions of the matrix in order to do correct preallocation
3040     */
3041 
3042     /* first get start and end of "diagonal" columns */
3043     if (csize == PETSC_DECIDE) {
3044       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3045       if (mglobal == n) { /* square matrix */
3046 	nlocal = m;
3047       } else {
3048         nlocal = n/size + ((n % size) > rank);
3049       }
3050     } else {
3051       nlocal = csize;
3052     }
3053     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3054     rstart = rend - nlocal;
3055     if (rank == size - 1 && rend != n) {
3056       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3057     }
3058 
3059     /* next, compute all the lengths */
3060     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3061     olens = dlens + m;
3062     for (i=0; i<m; i++) {
3063       jend = ii[i+1] - ii[i];
3064       olen = 0;
3065       dlen = 0;
3066       for (j=0; j<jend; j++) {
3067         if (*jj < rstart || *jj >= rend) olen++;
3068         else dlen++;
3069         jj++;
3070       }
3071       olens[i] = olen;
3072       dlens[i] = dlen;
3073     }
3074     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3075     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3076     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3077     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3078     ierr = PetscFree(dlens);CHKERRQ(ierr);
3079   } else {
3080     PetscInt ml,nl;
3081 
3082     M = *newmat;
3083     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3084     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3085     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3086     /*
3087          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3088        rather than the slower MatSetValues().
3089     */
3090     M->was_assembled = PETSC_TRUE;
3091     M->assembled     = PETSC_FALSE;
3092   }
3093   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3094   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3095   ii  = aij->i;
3096   jj  = aij->j;
3097   aa  = aij->a;
3098   for (i=0; i<m; i++) {
3099     row   = rstart + i;
3100     nz    = ii[i+1] - ii[i];
3101     cwork = jj;     jj += nz;
3102     vwork = aa;     aa += nz;
3103     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3104   }
3105 
3106   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3107   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3108   *newmat = M;
3109 
3110   /* save submatrix used in processor for next request */
3111   if (call ==  MAT_INITIAL_MATRIX) {
3112     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3113     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3114   }
3115 
3116   PetscFunctionReturn(0);
3117 }
3118 
3119 EXTERN_C_BEGIN
3120 #undef __FUNCT__
3121 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3122 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3123 {
3124   PetscInt       m,cstart, cend,j,nnz,i,d;
3125   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3126   const PetscInt *JJ;
3127   PetscScalar    *values;
3128   PetscErrorCode ierr;
3129 
3130   PetscFunctionBegin;
3131   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3132 
3133   ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3134   ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3135   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
3136   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
3137   m      = B->rmap->n;
3138   cstart = B->cmap->rstart;
3139   cend   = B->cmap->rend;
3140   rstart = B->rmap->rstart;
3141 
3142   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
3143   o_nnz = d_nnz + m;
3144 
3145 #if defined(PETSC_USE_DEBUGGING)
3146   for (i=0; i<m; i++) {
3147     nnz     = Ii[i+1]- Ii[i];
3148     JJ      = J + Ii[i];
3149     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3150     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3151     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3152     for (j=1; j<nnz; j++) {
3153       if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j);
3154     }
3155   }
3156 #endif
3157 
3158   for (i=0; i<m; i++) {
3159     nnz     = Ii[i+1]- Ii[i];
3160     JJ      = J + Ii[i];
3161     nnz_max = PetscMax(nnz_max,nnz);
3162     for (j=0; j<nnz; j++) {
3163       if (*JJ >= cstart) break;
3164       JJ++;
3165     }
3166     d = 0;
3167     for (; j<nnz; j++) {
3168       if (*JJ++ >= cend) break;
3169       d++;
3170     }
3171     d_nnz[i] = d;
3172     o_nnz[i] = nnz - d;
3173   }
3174   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3175   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
3176 
3177   if (v) values = (PetscScalar*)v;
3178   else {
3179     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3180     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3181   }
3182 
3183   for (i=0; i<m; i++) {
3184     ii   = i + rstart;
3185     nnz  = Ii[i+1]- Ii[i];
3186     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3187   }
3188   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3189   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3190 
3191   if (!v) {
3192     ierr = PetscFree(values);CHKERRQ(ierr);
3193   }
3194   PetscFunctionReturn(0);
3195 }
3196 EXTERN_C_END
3197 
3198 #undef __FUNCT__
3199 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3200 /*@
3201    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3202    (the default parallel PETSc format).
3203 
3204    Collective on MPI_Comm
3205 
3206    Input Parameters:
3207 +  B - the matrix
3208 .  i - the indices into j for the start of each local row (starts with zero)
3209 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3210 -  v - optional values in the matrix
3211 
3212    Level: developer
3213 
3214    Notes:
3215        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3216      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3217      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3218 
3219        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3220 
3221        The format which is used for the sparse matrix input, is equivalent to a
3222     row-major ordering.. i.e for the following matrix, the input data expected is
3223     as shown:
3224 
3225         1 0 0
3226         2 0 3     P0
3227        -------
3228         4 5 6     P1
3229 
3230      Process0 [P0]: rows_owned=[0,1]
3231         i =  {0,1,3}  [size = nrow+1  = 2+1]
3232         j =  {0,0,2}  [size = nz = 6]
3233         v =  {1,2,3}  [size = nz = 6]
3234 
3235      Process1 [P1]: rows_owned=[2]
3236         i =  {0,3}    [size = nrow+1  = 1+1]
3237         j =  {0,1,2}  [size = nz = 6]
3238         v =  {4,5,6}  [size = nz = 6]
3239 
3240       The column indices for each row MUST be sorted.
3241 
3242 .keywords: matrix, aij, compressed row, sparse, parallel
3243 
3244 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3245           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3246 @*/
3247 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3248 {
3249   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3250 
3251   PetscFunctionBegin;
3252   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3253   if (f) {
3254     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3255   }
3256   PetscFunctionReturn(0);
3257 }
3258 
3259 #undef __FUNCT__
3260 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3261 /*@C
3262    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3263    (the default parallel PETSc format).  For good matrix assembly performance
3264    the user should preallocate the matrix storage by setting the parameters
3265    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3266    performance can be increased by more than a factor of 50.
3267 
3268    Collective on MPI_Comm
3269 
3270    Input Parameters:
3271 +  A - the matrix
3272 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3273            (same value is used for all local rows)
3274 .  d_nnz - array containing the number of nonzeros in the various rows of the
3275            DIAGONAL portion of the local submatrix (possibly different for each row)
3276            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3277            The size of this array is equal to the number of local rows, i.e 'm'.
3278            You must leave room for the diagonal entry even if it is zero.
3279 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3280            submatrix (same value is used for all local rows).
3281 -  o_nnz - array containing the number of nonzeros in the various rows of the
3282            OFF-DIAGONAL portion of the local submatrix (possibly different for
3283            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3284            structure. The size of this array is equal to the number
3285            of local rows, i.e 'm'.
3286 
3287    If the *_nnz parameter is given then the *_nz parameter is ignored
3288 
3289    The AIJ format (also called the Yale sparse matrix format or
3290    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3291    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3292 
3293    The parallel matrix is partitioned such that the first m0 rows belong to
3294    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3295    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3296 
3297    The DIAGONAL portion of the local submatrix of a processor can be defined
3298    as the submatrix which is obtained by extraction the part corresponding
3299    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3300    first row that belongs to the processor, and r2 is the last row belonging
3301    to the this processor. This is a square mxm matrix. The remaining portion
3302    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3303 
3304    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3305 
3306    You can call MatGetInfo() to get information on how effective the preallocation was;
3307    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3308    You can also run with the option -info and look for messages with the string
3309    malloc in them to see if additional memory allocation was needed.
3310 
3311    Example usage:
3312 
3313    Consider the following 8x8 matrix with 34 non-zero values, that is
3314    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3315    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3316    as follows:
3317 
3318 .vb
3319             1  2  0  |  0  3  0  |  0  4
3320     Proc0   0  5  6  |  7  0  0  |  8  0
3321             9  0 10  | 11  0  0  | 12  0
3322     -------------------------------------
3323            13  0 14  | 15 16 17  |  0  0
3324     Proc1   0 18  0  | 19 20 21  |  0  0
3325             0  0  0  | 22 23  0  | 24  0
3326     -------------------------------------
3327     Proc2  25 26 27  |  0  0 28  | 29  0
3328            30  0  0  | 31 32 33  |  0 34
3329 .ve
3330 
3331    This can be represented as a collection of submatrices as:
3332 
3333 .vb
3334       A B C
3335       D E F
3336       G H I
3337 .ve
3338 
3339    Where the submatrices A,B,C are owned by proc0, D,E,F are
3340    owned by proc1, G,H,I are owned by proc2.
3341 
3342    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3343    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3344    The 'M','N' parameters are 8,8, and have the same values on all procs.
3345 
3346    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3347    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3348    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3349    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3350    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3351    matrix, ans [DF] as another SeqAIJ matrix.
3352 
3353    When d_nz, o_nz parameters are specified, d_nz storage elements are
3354    allocated for every row of the local diagonal submatrix, and o_nz
3355    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3356    One way to choose d_nz and o_nz is to use the max nonzerors per local
3357    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3358    In this case, the values of d_nz,o_nz are:
3359 .vb
3360      proc0 : dnz = 2, o_nz = 2
3361      proc1 : dnz = 3, o_nz = 2
3362      proc2 : dnz = 1, o_nz = 4
3363 .ve
3364    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3365    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3366    for proc3. i.e we are using 12+15+10=37 storage locations to store
3367    34 values.
3368 
3369    When d_nnz, o_nnz parameters are specified, the storage is specified
3370    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3371    In the above case the values for d_nnz,o_nnz are:
3372 .vb
3373      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3374      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3375      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3376 .ve
3377    Here the space allocated is sum of all the above values i.e 34, and
3378    hence pre-allocation is perfect.
3379 
3380    Level: intermediate
3381 
3382 .keywords: matrix, aij, compressed row, sparse, parallel
3383 
3384 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3385           MPIAIJ, MatGetInfo()
3386 @*/
3387 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3388 {
3389   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3390 
3391   PetscFunctionBegin;
3392   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3393   if (f) {
3394     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3395   }
3396   PetscFunctionReturn(0);
3397 }
3398 
3399 #undef __FUNCT__
3400 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3401 /*@
3402      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3403          CSR format the local rows.
3404 
3405    Collective on MPI_Comm
3406 
3407    Input Parameters:
3408 +  comm - MPI communicator
3409 .  m - number of local rows (Cannot be PETSC_DECIDE)
3410 .  n - This value should be the same as the local size used in creating the
3411        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3412        calculated if N is given) For square matrices n is almost always m.
3413 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3414 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3415 .   i - row indices
3416 .   j - column indices
3417 -   a - matrix values
3418 
3419    Output Parameter:
3420 .   mat - the matrix
3421 
3422    Level: intermediate
3423 
3424    Notes:
3425        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3426      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3427      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3428 
3429        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3430 
3431        The format which is used for the sparse matrix input, is equivalent to a
3432     row-major ordering.. i.e for the following matrix, the input data expected is
3433     as shown:
3434 
3435         1 0 0
3436         2 0 3     P0
3437        -------
3438         4 5 6     P1
3439 
3440      Process0 [P0]: rows_owned=[0,1]
3441         i =  {0,1,3}  [size = nrow+1  = 2+1]
3442         j =  {0,0,2}  [size = nz = 6]
3443         v =  {1,2,3}  [size = nz = 6]
3444 
3445      Process1 [P1]: rows_owned=[2]
3446         i =  {0,3}    [size = nrow+1  = 1+1]
3447         j =  {0,1,2}  [size = nz = 6]
3448         v =  {4,5,6}  [size = nz = 6]
3449 
3450 .keywords: matrix, aij, compressed row, sparse, parallel
3451 
3452 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3453           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3454 @*/
3455 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3456 {
3457   PetscErrorCode ierr;
3458 
3459  PetscFunctionBegin;
3460   if (i[0]) {
3461     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3462   }
3463   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3464   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3465   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3466   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3467   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3468   PetscFunctionReturn(0);
3469 }
3470 
3471 #undef __FUNCT__
3472 #define __FUNCT__ "MatCreateMPIAIJ"
3473 /*@C
3474    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3475    (the default parallel PETSc format).  For good matrix assembly performance
3476    the user should preallocate the matrix storage by setting the parameters
3477    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3478    performance can be increased by more than a factor of 50.
3479 
3480    Collective on MPI_Comm
3481 
3482    Input Parameters:
3483 +  comm - MPI communicator
3484 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3485            This value should be the same as the local size used in creating the
3486            y vector for the matrix-vector product y = Ax.
3487 .  n - This value should be the same as the local size used in creating the
3488        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3489        calculated if N is given) For square matrices n is almost always m.
3490 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3491 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3492 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3493            (same value is used for all local rows)
3494 .  d_nnz - array containing the number of nonzeros in the various rows of the
3495            DIAGONAL portion of the local submatrix (possibly different for each row)
3496            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3497            The size of this array is equal to the number of local rows, i.e 'm'.
3498            You must leave room for the diagonal entry even if it is zero.
3499 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3500            submatrix (same value is used for all local rows).
3501 -  o_nnz - array containing the number of nonzeros in the various rows of the
3502            OFF-DIAGONAL portion of the local submatrix (possibly different for
3503            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3504            structure. The size of this array is equal to the number
3505            of local rows, i.e 'm'.
3506 
3507    Output Parameter:
3508 .  A - the matrix
3509 
3510    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3511    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3512    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3513 
3514    Notes:
3515    If the *_nnz parameter is given then the *_nz parameter is ignored
3516 
3517    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3518    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3519    storage requirements for this matrix.
3520 
3521    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3522    processor than it must be used on all processors that share the object for
3523    that argument.
3524 
3525    The user MUST specify either the local or global matrix dimensions
3526    (possibly both).
3527 
3528    The parallel matrix is partitioned across processors such that the
3529    first m0 rows belong to process 0, the next m1 rows belong to
3530    process 1, the next m2 rows belong to process 2 etc.. where
3531    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3532    values corresponding to [m x N] submatrix.
3533 
3534    The columns are logically partitioned with the n0 columns belonging
3535    to 0th partition, the next n1 columns belonging to the next
3536    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3537 
3538    The DIAGONAL portion of the local submatrix on any given processor
3539    is the submatrix corresponding to the rows and columns m,n
3540    corresponding to the given processor. i.e diagonal matrix on
3541    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3542    etc. The remaining portion of the local submatrix [m x (N-n)]
3543    constitute the OFF-DIAGONAL portion. The example below better
3544    illustrates this concept.
3545 
3546    For a square global matrix we define each processor's diagonal portion
3547    to be its local rows and the corresponding columns (a square submatrix);
3548    each processor's off-diagonal portion encompasses the remainder of the
3549    local matrix (a rectangular submatrix).
3550 
3551    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3552 
3553    When calling this routine with a single process communicator, a matrix of
3554    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3555    type of communicator, use the construction mechanism:
3556      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
3557 
3558    By default, this format uses inodes (identical nodes) when possible.
3559    We search for consecutive rows with the same nonzero structure, thereby
3560    reusing matrix information to achieve increased efficiency.
3561 
3562    Options Database Keys:
3563 +  -mat_no_inode  - Do not use inodes
3564 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3565 -  -mat_aij_oneindex - Internally use indexing starting at 1
3566         rather than 0.  Note that when calling MatSetValues(),
3567         the user still MUST index entries starting at 0!
3568 
3569 
3570    Example usage:
3571 
3572    Consider the following 8x8 matrix with 34 non-zero values, that is
3573    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3574    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3575    as follows:
3576 
3577 .vb
3578             1  2  0  |  0  3  0  |  0  4
3579     Proc0   0  5  6  |  7  0  0  |  8  0
3580             9  0 10  | 11  0  0  | 12  0
3581     -------------------------------------
3582            13  0 14  | 15 16 17  |  0  0
3583     Proc1   0 18  0  | 19 20 21  |  0  0
3584             0  0  0  | 22 23  0  | 24  0
3585     -------------------------------------
3586     Proc2  25 26 27  |  0  0 28  | 29  0
3587            30  0  0  | 31 32 33  |  0 34
3588 .ve
3589 
3590    This can be represented as a collection of submatrices as:
3591 
3592 .vb
3593       A B C
3594       D E F
3595       G H I
3596 .ve
3597 
3598    Where the submatrices A,B,C are owned by proc0, D,E,F are
3599    owned by proc1, G,H,I are owned by proc2.
3600 
3601    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3602    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3603    The 'M','N' parameters are 8,8, and have the same values on all procs.
3604 
3605    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3606    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3607    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3608    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3609    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3610    matrix, ans [DF] as another SeqAIJ matrix.
3611 
3612    When d_nz, o_nz parameters are specified, d_nz storage elements are
3613    allocated for every row of the local diagonal submatrix, and o_nz
3614    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3615    One way to choose d_nz and o_nz is to use the max nonzerors per local
3616    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3617    In this case, the values of d_nz,o_nz are:
3618 .vb
3619      proc0 : dnz = 2, o_nz = 2
3620      proc1 : dnz = 3, o_nz = 2
3621      proc2 : dnz = 1, o_nz = 4
3622 .ve
3623    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3624    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3625    for proc3. i.e we are using 12+15+10=37 storage locations to store
3626    34 values.
3627 
3628    When d_nnz, o_nnz parameters are specified, the storage is specified
3629    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3630    In the above case the values for d_nnz,o_nnz are:
3631 .vb
3632      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3633      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3634      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3635 .ve
3636    Here the space allocated is sum of all the above values i.e 34, and
3637    hence pre-allocation is perfect.
3638 
3639    Level: intermediate
3640 
3641 .keywords: matrix, aij, compressed row, sparse, parallel
3642 
3643 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3644           MPIAIJ, MatCreateMPIAIJWithArrays()
3645 @*/
3646 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3647 {
3648   PetscErrorCode ierr;
3649   PetscMPIInt    size;
3650 
3651   PetscFunctionBegin;
3652   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3653   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3654   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3655   if (size > 1) {
3656     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3657     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3658   } else {
3659     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3660     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3661   }
3662   PetscFunctionReturn(0);
3663 }
3664 
3665 #undef __FUNCT__
3666 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3667 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3668 {
3669   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3670 
3671   PetscFunctionBegin;
3672   *Ad     = a->A;
3673   *Ao     = a->B;
3674   *colmap = a->garray;
3675   PetscFunctionReturn(0);
3676 }
3677 
3678 #undef __FUNCT__
3679 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3680 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3681 {
3682   PetscErrorCode ierr;
3683   PetscInt       i;
3684   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3685 
3686   PetscFunctionBegin;
3687   if (coloring->ctype == IS_COLORING_GLOBAL) {
3688     ISColoringValue *allcolors,*colors;
3689     ISColoring      ocoloring;
3690 
3691     /* set coloring for diagonal portion */
3692     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3693 
3694     /* set coloring for off-diagonal portion */
3695     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3696     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3697     for (i=0; i<a->B->cmap->n; i++) {
3698       colors[i] = allcolors[a->garray[i]];
3699     }
3700     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3701     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3702     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3703     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3704   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3705     ISColoringValue *colors;
3706     PetscInt        *larray;
3707     ISColoring      ocoloring;
3708 
3709     /* set coloring for diagonal portion */
3710     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3711     for (i=0; i<a->A->cmap->n; i++) {
3712       larray[i] = i + A->cmap->rstart;
3713     }
3714     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3715     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3716     for (i=0; i<a->A->cmap->n; i++) {
3717       colors[i] = coloring->colors[larray[i]];
3718     }
3719     ierr = PetscFree(larray);CHKERRQ(ierr);
3720     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3721     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3722     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3723 
3724     /* set coloring for off-diagonal portion */
3725     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3726     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3727     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3728     for (i=0; i<a->B->cmap->n; i++) {
3729       colors[i] = coloring->colors[larray[i]];
3730     }
3731     ierr = PetscFree(larray);CHKERRQ(ierr);
3732     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3733     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3734     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3735   } else {
3736     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3737   }
3738 
3739   PetscFunctionReturn(0);
3740 }
3741 
3742 #if defined(PETSC_HAVE_ADIC)
3743 #undef __FUNCT__
3744 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3745 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3746 {
3747   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3748   PetscErrorCode ierr;
3749 
3750   PetscFunctionBegin;
3751   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3752   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3753   PetscFunctionReturn(0);
3754 }
3755 #endif
3756 
3757 #undef __FUNCT__
3758 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3759 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3760 {
3761   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3762   PetscErrorCode ierr;
3763 
3764   PetscFunctionBegin;
3765   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3766   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3767   PetscFunctionReturn(0);
3768 }
3769 
3770 #undef __FUNCT__
3771 #define __FUNCT__ "MatMerge"
3772 /*@
3773       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3774                  matrices from each processor
3775 
3776     Collective on MPI_Comm
3777 
3778    Input Parameters:
3779 +    comm - the communicators the parallel matrix will live on
3780 .    inmat - the input sequential matrices
3781 .    n - number of local columns (or PETSC_DECIDE)
3782 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3783 
3784    Output Parameter:
3785 .    outmat - the parallel matrix generated
3786 
3787     Level: advanced
3788 
3789    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3790 
3791 @*/
3792 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3793 {
3794   PetscErrorCode ierr;
3795   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3796   PetscInt       *indx;
3797   PetscScalar    *values;
3798 
3799   PetscFunctionBegin;
3800   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3801   if (scall == MAT_INITIAL_MATRIX){
3802     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3803     if (n == PETSC_DECIDE){
3804       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3805     }
3806     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3807     rstart -= m;
3808 
3809     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3810     for (i=0;i<m;i++) {
3811       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3812       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3813       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3814     }
3815     /* This routine will ONLY return MPIAIJ type matrix */
3816     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3817     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3818     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3819     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3820     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3821 
3822   } else if (scall == MAT_REUSE_MATRIX){
3823     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3824   } else {
3825     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3826   }
3827 
3828   for (i=0;i<m;i++) {
3829     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3830     Ii    = i + rstart;
3831     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3832     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3833   }
3834   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3835   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3836   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3837 
3838   PetscFunctionReturn(0);
3839 }
3840 
3841 #undef __FUNCT__
3842 #define __FUNCT__ "MatFileSplit"
3843 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3844 {
3845   PetscErrorCode    ierr;
3846   PetscMPIInt       rank;
3847   PetscInt          m,N,i,rstart,nnz;
3848   size_t            len;
3849   const PetscInt    *indx;
3850   PetscViewer       out;
3851   char              *name;
3852   Mat               B;
3853   const PetscScalar *values;
3854 
3855   PetscFunctionBegin;
3856   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3857   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3858   /* Should this be the type of the diagonal block of A? */
3859   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3860   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3861   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3862   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3863   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3864   for (i=0;i<m;i++) {
3865     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3866     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3867     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3868   }
3869   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3870   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3871 
3872   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3873   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3874   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3875   sprintf(name,"%s.%d",outfile,rank);
3876   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3877   ierr = PetscFree(name);
3878   ierr = MatView(B,out);CHKERRQ(ierr);
3879   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3880   ierr = MatDestroy(B);CHKERRQ(ierr);
3881   PetscFunctionReturn(0);
3882 }
3883 
3884 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3885 #undef __FUNCT__
3886 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3887 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3888 {
3889   PetscErrorCode       ierr;
3890   Mat_Merge_SeqsToMPI  *merge;
3891   PetscContainer       container;
3892 
3893   PetscFunctionBegin;
3894   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3895   if (container) {
3896     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3897     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3898     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3899     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3900     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3901     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3902     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3903     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3904     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3905     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3906     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3907     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
3908 
3909     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
3910     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3911   }
3912   ierr = PetscFree(merge);CHKERRQ(ierr);
3913 
3914   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3915   PetscFunctionReturn(0);
3916 }
3917 
3918 #include "../src/mat/utils/freespace.h"
3919 #include "petscbt.h"
3920 
3921 #undef __FUNCT__
3922 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3923 /*@C
3924       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3925                  matrices from each processor
3926 
3927     Collective on MPI_Comm
3928 
3929    Input Parameters:
3930 +    comm - the communicators the parallel matrix will live on
3931 .    seqmat - the input sequential matrices
3932 .    m - number of local rows (or PETSC_DECIDE)
3933 .    n - number of local columns (or PETSC_DECIDE)
3934 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3935 
3936    Output Parameter:
3937 .    mpimat - the parallel matrix generated
3938 
3939     Level: advanced
3940 
3941    Notes:
3942      The dimensions of the sequential matrix in each processor MUST be the same.
3943      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3944      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3945 @*/
3946 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3947 {
3948   PetscErrorCode       ierr;
3949   MPI_Comm             comm=((PetscObject)mpimat)->comm;
3950   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3951   PetscMPIInt          size,rank,taga,*len_s;
3952   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
3953   PetscInt             proc,m;
3954   PetscInt             **buf_ri,**buf_rj;
3955   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3956   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3957   MPI_Request          *s_waits,*r_waits;
3958   MPI_Status           *status;
3959   MatScalar            *aa=a->a;
3960   MatScalar            **abuf_r,*ba_i;
3961   Mat_Merge_SeqsToMPI  *merge;
3962   PetscContainer       container;
3963 
3964   PetscFunctionBegin;
3965   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3966 
3967   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3968   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3969 
3970   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3971   if (container) {
3972     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3973   }
3974   bi     = merge->bi;
3975   bj     = merge->bj;
3976   buf_ri = merge->buf_ri;
3977   buf_rj = merge->buf_rj;
3978 
3979   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3980   owners = merge->rowmap.range;
3981   len_s  = merge->len_s;
3982 
3983   /* send and recv matrix values */
3984   /*-----------------------------*/
3985   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3986   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3987 
3988   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3989   for (proc=0,k=0; proc<size; proc++){
3990     if (!len_s[proc]) continue;
3991     i = owners[proc];
3992     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3993     k++;
3994   }
3995 
3996   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3997   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3998   ierr = PetscFree(status);CHKERRQ(ierr);
3999 
4000   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4001   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4002 
4003   /* insert mat values of mpimat */
4004   /*----------------------------*/
4005   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4006   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4007   nextrow = buf_ri_k + merge->nrecv;
4008   nextai  = nextrow + merge->nrecv;
4009 
4010   for (k=0; k<merge->nrecv; k++){
4011     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4012     nrows = *(buf_ri_k[k]);
4013     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4014     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4015   }
4016 
4017   /* set values of ba */
4018   m = merge->rowmap.n;
4019   for (i=0; i<m; i++) {
4020     arow = owners[rank] + i;
4021     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4022     bnzi = bi[i+1] - bi[i];
4023     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4024 
4025     /* add local non-zero vals of this proc's seqmat into ba */
4026     anzi = ai[arow+1] - ai[arow];
4027     aj   = a->j + ai[arow];
4028     aa   = a->a + ai[arow];
4029     nextaj = 0;
4030     for (j=0; nextaj<anzi; j++){
4031       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4032         ba_i[j] += aa[nextaj++];
4033       }
4034     }
4035 
4036     /* add received vals into ba */
4037     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4038       /* i-th row */
4039       if (i == *nextrow[k]) {
4040         anzi = *(nextai[k]+1) - *nextai[k];
4041         aj   = buf_rj[k] + *(nextai[k]);
4042         aa   = abuf_r[k] + *(nextai[k]);
4043         nextaj = 0;
4044         for (j=0; nextaj<anzi; j++){
4045           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4046             ba_i[j] += aa[nextaj++];
4047           }
4048         }
4049         nextrow[k]++; nextai[k]++;
4050       }
4051     }
4052     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4053   }
4054   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4055   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4056 
4057   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4058   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4059   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4060   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4061   PetscFunctionReturn(0);
4062 }
4063 
4064 #undef __FUNCT__
4065 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4066 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4067 {
4068   PetscErrorCode       ierr;
4069   Mat                  B_mpi;
4070   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4071   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4072   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4073   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4074   PetscInt             len,proc,*dnz,*onz;
4075   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4076   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4077   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4078   MPI_Status           *status;
4079   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4080   PetscBT              lnkbt;
4081   Mat_Merge_SeqsToMPI  *merge;
4082   PetscContainer       container;
4083 
4084   PetscFunctionBegin;
4085   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4086 
4087   /* make sure it is a PETSc comm */
4088   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4089   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4090   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4091 
4092   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4093   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4094 
4095   /* determine row ownership */
4096   /*---------------------------------------------------------*/
4097   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
4098   merge->rowmap.n = m;
4099   merge->rowmap.N = M;
4100   merge->rowmap.bs = 1;
4101   ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr);
4102   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4103   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4104 
4105   m      = merge->rowmap.n;
4106   M      = merge->rowmap.N;
4107   owners = merge->rowmap.range;
4108 
4109   /* determine the number of messages to send, their lengths */
4110   /*---------------------------------------------------------*/
4111   len_s  = merge->len_s;
4112 
4113   len = 0;  /* length of buf_si[] */
4114   merge->nsend = 0;
4115   for (proc=0; proc<size; proc++){
4116     len_si[proc] = 0;
4117     if (proc == rank){
4118       len_s[proc] = 0;
4119     } else {
4120       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4121       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4122     }
4123     if (len_s[proc]) {
4124       merge->nsend++;
4125       nrows = 0;
4126       for (i=owners[proc]; i<owners[proc+1]; i++){
4127         if (ai[i+1] > ai[i]) nrows++;
4128       }
4129       len_si[proc] = 2*(nrows+1);
4130       len += len_si[proc];
4131     }
4132   }
4133 
4134   /* determine the number and length of messages to receive for ij-structure */
4135   /*-------------------------------------------------------------------------*/
4136   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4137   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4138 
4139   /* post the Irecv of j-structure */
4140   /*-------------------------------*/
4141   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4142   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4143 
4144   /* post the Isend of j-structure */
4145   /*--------------------------------*/
4146   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
4147   sj_waits = si_waits + merge->nsend;
4148 
4149   for (proc=0, k=0; proc<size; proc++){
4150     if (!len_s[proc]) continue;
4151     i = owners[proc];
4152     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4153     k++;
4154   }
4155 
4156   /* receives and sends of j-structure are complete */
4157   /*------------------------------------------------*/
4158   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4159   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4160 
4161   /* send and recv i-structure */
4162   /*---------------------------*/
4163   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4164   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4165 
4166   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4167   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4168   for (proc=0,k=0; proc<size; proc++){
4169     if (!len_s[proc]) continue;
4170     /* form outgoing message for i-structure:
4171          buf_si[0]:                 nrows to be sent
4172                [1:nrows]:           row index (global)
4173                [nrows+1:2*nrows+1]: i-structure index
4174     */
4175     /*-------------------------------------------*/
4176     nrows = len_si[proc]/2 - 1;
4177     buf_si_i    = buf_si + nrows+1;
4178     buf_si[0]   = nrows;
4179     buf_si_i[0] = 0;
4180     nrows = 0;
4181     for (i=owners[proc]; i<owners[proc+1]; i++){
4182       anzi = ai[i+1] - ai[i];
4183       if (anzi) {
4184         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4185         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4186         nrows++;
4187       }
4188     }
4189     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4190     k++;
4191     buf_si += len_si[proc];
4192   }
4193 
4194   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4195   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4196 
4197   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4198   for (i=0; i<merge->nrecv; i++){
4199     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4200   }
4201 
4202   ierr = PetscFree(len_si);CHKERRQ(ierr);
4203   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4204   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4205   ierr = PetscFree(si_waits);CHKERRQ(ierr);
4206   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4207   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4208   ierr = PetscFree(status);CHKERRQ(ierr);
4209 
4210   /* compute a local seq matrix in each processor */
4211   /*----------------------------------------------*/
4212   /* allocate bi array and free space for accumulating nonzero column info */
4213   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4214   bi[0] = 0;
4215 
4216   /* create and initialize a linked list */
4217   nlnk = N+1;
4218   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4219 
4220   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4221   len = 0;
4222   len  = ai[owners[rank+1]] - ai[owners[rank]];
4223   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4224   current_space = free_space;
4225 
4226   /* determine symbolic info for each local row */
4227   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4228   nextrow = buf_ri_k + merge->nrecv;
4229   nextai  = nextrow + merge->nrecv;
4230   for (k=0; k<merge->nrecv; k++){
4231     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4232     nrows = *buf_ri_k[k];
4233     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4234     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4235   }
4236 
4237   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4238   len = 0;
4239   for (i=0;i<m;i++) {
4240     bnzi   = 0;
4241     /* add local non-zero cols of this proc's seqmat into lnk */
4242     arow   = owners[rank] + i;
4243     anzi   = ai[arow+1] - ai[arow];
4244     aj     = a->j + ai[arow];
4245     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4246     bnzi += nlnk;
4247     /* add received col data into lnk */
4248     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4249       if (i == *nextrow[k]) { /* i-th row */
4250         anzi = *(nextai[k]+1) - *nextai[k];
4251         aj   = buf_rj[k] + *nextai[k];
4252         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4253         bnzi += nlnk;
4254         nextrow[k]++; nextai[k]++;
4255       }
4256     }
4257     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4258 
4259     /* if free space is not available, make more free space */
4260     if (current_space->local_remaining<bnzi) {
4261       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4262       nspacedouble++;
4263     }
4264     /* copy data into free space, then initialize lnk */
4265     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4266     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4267 
4268     current_space->array           += bnzi;
4269     current_space->local_used      += bnzi;
4270     current_space->local_remaining -= bnzi;
4271 
4272     bi[i+1] = bi[i] + bnzi;
4273   }
4274 
4275   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4276 
4277   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4278   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4279   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4280 
4281   /* create symbolic parallel matrix B_mpi */
4282   /*---------------------------------------*/
4283   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4284   if (n==PETSC_DECIDE) {
4285     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4286   } else {
4287     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4288   }
4289   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4290   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4291   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4292 
4293   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4294   B_mpi->assembled     = PETSC_FALSE;
4295   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4296   merge->bi            = bi;
4297   merge->bj            = bj;
4298   merge->buf_ri        = buf_ri;
4299   merge->buf_rj        = buf_rj;
4300   merge->coi           = PETSC_NULL;
4301   merge->coj           = PETSC_NULL;
4302   merge->owners_co     = PETSC_NULL;
4303 
4304   /* attach the supporting struct to B_mpi for reuse */
4305   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4306   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4307   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4308   *mpimat = B_mpi;
4309 
4310   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4311   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4312   PetscFunctionReturn(0);
4313 }
4314 
4315 #undef __FUNCT__
4316 #define __FUNCT__ "MatMerge_SeqsToMPI"
4317 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4318 {
4319   PetscErrorCode   ierr;
4320 
4321   PetscFunctionBegin;
4322   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4323   if (scall == MAT_INITIAL_MATRIX){
4324     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4325   }
4326   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4327   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4328   PetscFunctionReturn(0);
4329 }
4330 
4331 #undef __FUNCT__
4332 #define __FUNCT__ "MatGetLocalMat"
4333 /*@
4334      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4335 
4336     Not Collective
4337 
4338    Input Parameters:
4339 +    A - the matrix
4340 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4341 
4342    Output Parameter:
4343 .    A_loc - the local sequential matrix generated
4344 
4345     Level: developer
4346 
4347 @*/
4348 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4349 {
4350   PetscErrorCode  ierr;
4351   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4352   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4353   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4354   MatScalar       *aa=a->a,*ba=b->a,*cam;
4355   PetscScalar     *ca;
4356   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4357   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4358 
4359   PetscFunctionBegin;
4360   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4361   if (scall == MAT_INITIAL_MATRIX){
4362     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4363     ci[0] = 0;
4364     for (i=0; i<am; i++){
4365       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4366     }
4367     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4368     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4369     k = 0;
4370     for (i=0; i<am; i++) {
4371       ncols_o = bi[i+1] - bi[i];
4372       ncols_d = ai[i+1] - ai[i];
4373       /* off-diagonal portion of A */
4374       for (jo=0; jo<ncols_o; jo++) {
4375         col = cmap[*bj];
4376         if (col >= cstart) break;
4377         cj[k]   = col; bj++;
4378         ca[k++] = *ba++;
4379       }
4380       /* diagonal portion of A */
4381       for (j=0; j<ncols_d; j++) {
4382         cj[k]   = cstart + *aj++;
4383         ca[k++] = *aa++;
4384       }
4385       /* off-diagonal portion of A */
4386       for (j=jo; j<ncols_o; j++) {
4387         cj[k]   = cmap[*bj++];
4388         ca[k++] = *ba++;
4389       }
4390     }
4391     /* put together the new matrix */
4392     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4393     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4394     /* Since these are PETSc arrays, change flags to free them as necessary. */
4395     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4396     mat->free_a  = PETSC_TRUE;
4397     mat->free_ij = PETSC_TRUE;
4398     mat->nonew   = 0;
4399   } else if (scall == MAT_REUSE_MATRIX){
4400     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4401     ci = mat->i; cj = mat->j; cam = mat->a;
4402     for (i=0; i<am; i++) {
4403       /* off-diagonal portion of A */
4404       ncols_o = bi[i+1] - bi[i];
4405       for (jo=0; jo<ncols_o; jo++) {
4406         col = cmap[*bj];
4407         if (col >= cstart) break;
4408         *cam++ = *ba++; bj++;
4409       }
4410       /* diagonal portion of A */
4411       ncols_d = ai[i+1] - ai[i];
4412       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4413       /* off-diagonal portion of A */
4414       for (j=jo; j<ncols_o; j++) {
4415         *cam++ = *ba++; bj++;
4416       }
4417     }
4418   } else {
4419     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4420   }
4421 
4422   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4423   PetscFunctionReturn(0);
4424 }
4425 
4426 #undef __FUNCT__
4427 #define __FUNCT__ "MatGetLocalMatCondensed"
4428 /*@C
4429      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4430 
4431     Not Collective
4432 
4433    Input Parameters:
4434 +    A - the matrix
4435 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4436 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4437 
4438    Output Parameter:
4439 .    A_loc - the local sequential matrix generated
4440 
4441     Level: developer
4442 
4443 @*/
4444 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4445 {
4446   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4447   PetscErrorCode    ierr;
4448   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4449   IS                isrowa,iscola;
4450   Mat               *aloc;
4451 
4452   PetscFunctionBegin;
4453   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4454   if (!row){
4455     start = A->rmap->rstart; end = A->rmap->rend;
4456     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4457   } else {
4458     isrowa = *row;
4459   }
4460   if (!col){
4461     start = A->cmap->rstart;
4462     cmap  = a->garray;
4463     nzA   = a->A->cmap->n;
4464     nzB   = a->B->cmap->n;
4465     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4466     ncols = 0;
4467     for (i=0; i<nzB; i++) {
4468       if (cmap[i] < start) idx[ncols++] = cmap[i];
4469       else break;
4470     }
4471     imark = i;
4472     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4473     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4474     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4475     ierr = PetscFree(idx);CHKERRQ(ierr);
4476   } else {
4477     iscola = *col;
4478   }
4479   if (scall != MAT_INITIAL_MATRIX){
4480     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4481     aloc[0] = *A_loc;
4482   }
4483   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4484   *A_loc = aloc[0];
4485   ierr = PetscFree(aloc);CHKERRQ(ierr);
4486   if (!row){
4487     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4488   }
4489   if (!col){
4490     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4491   }
4492   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4493   PetscFunctionReturn(0);
4494 }
4495 
4496 #undef __FUNCT__
4497 #define __FUNCT__ "MatGetBrowsOfAcols"
4498 /*@C
4499     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4500 
4501     Collective on Mat
4502 
4503    Input Parameters:
4504 +    A,B - the matrices in mpiaij format
4505 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4506 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4507 
4508    Output Parameter:
4509 +    rowb, colb - index sets of rows and columns of B to extract
4510 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4511 -    B_seq - the sequential matrix generated
4512 
4513     Level: developer
4514 
4515 @*/
4516 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4517 {
4518   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4519   PetscErrorCode    ierr;
4520   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4521   IS                isrowb,iscolb;
4522   Mat               *bseq;
4523 
4524   PetscFunctionBegin;
4525   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4526     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4527   }
4528   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4529 
4530   if (scall == MAT_INITIAL_MATRIX){
4531     start = A->cmap->rstart;
4532     cmap  = a->garray;
4533     nzA   = a->A->cmap->n;
4534     nzB   = a->B->cmap->n;
4535     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4536     ncols = 0;
4537     for (i=0; i<nzB; i++) {  /* row < local row index */
4538       if (cmap[i] < start) idx[ncols++] = cmap[i];
4539       else break;
4540     }
4541     imark = i;
4542     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4543     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4544     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4545     ierr = PetscFree(idx);CHKERRQ(ierr);
4546     *brstart = imark;
4547     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4548   } else {
4549     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4550     isrowb = *rowb; iscolb = *colb;
4551     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4552     bseq[0] = *B_seq;
4553   }
4554   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4555   *B_seq = bseq[0];
4556   ierr = PetscFree(bseq);CHKERRQ(ierr);
4557   if (!rowb){
4558     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4559   } else {
4560     *rowb = isrowb;
4561   }
4562   if (!colb){
4563     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4564   } else {
4565     *colb = iscolb;
4566   }
4567   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4568   PetscFunctionReturn(0);
4569 }
4570 
4571 #undef __FUNCT__
4572 #define __FUNCT__ "MatGetBrowsOfAoCols"
4573 /*@C
4574     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4575     of the OFF-DIAGONAL portion of local A
4576 
4577     Collective on Mat
4578 
4579    Input Parameters:
4580 +    A,B - the matrices in mpiaij format
4581 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4582 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4583 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4584 
4585    Output Parameter:
4586 +    B_oth - the sequential matrix generated
4587 
4588     Level: developer
4589 
4590 @*/
4591 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth)
4592 {
4593   VecScatter_MPI_General *gen_to,*gen_from;
4594   PetscErrorCode         ierr;
4595   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4596   Mat_SeqAIJ             *b_oth;
4597   VecScatter             ctx=a->Mvctx;
4598   MPI_Comm               comm=((PetscObject)ctx)->comm;
4599   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4600   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4601   PetscScalar            *rvalues,*svalues;
4602   MatScalar              *b_otha,*bufa,*bufA;
4603   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4604   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4605   MPI_Status             *sstatus,rstatus;
4606   PetscMPIInt            jj;
4607   PetscInt               *cols,sbs,rbs;
4608   PetscScalar            *vals;
4609 
4610   PetscFunctionBegin;
4611   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4612     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4613   }
4614   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4615   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4616 
4617   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4618   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4619   rvalues  = gen_from->values; /* holds the length of receiving row */
4620   svalues  = gen_to->values;   /* holds the length of sending row */
4621   nrecvs   = gen_from->n;
4622   nsends   = gen_to->n;
4623 
4624   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4625   srow     = gen_to->indices;   /* local row index to be sent */
4626   sstarts  = gen_to->starts;
4627   sprocs   = gen_to->procs;
4628   sstatus  = gen_to->sstatus;
4629   sbs      = gen_to->bs;
4630   rstarts  = gen_from->starts;
4631   rprocs   = gen_from->procs;
4632   rbs      = gen_from->bs;
4633 
4634   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4635   if (scall == MAT_INITIAL_MATRIX){
4636     /* i-array */
4637     /*---------*/
4638     /*  post receives */
4639     for (i=0; i<nrecvs; i++){
4640       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4641       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4642       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4643     }
4644 
4645     /* pack the outgoing message */
4646     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4647     rstartsj = sstartsj + nsends +1;
4648     sstartsj[0] = 0;  rstartsj[0] = 0;
4649     len = 0; /* total length of j or a array to be sent */
4650     k = 0;
4651     for (i=0; i<nsends; i++){
4652       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4653       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4654       for (j=0; j<nrows; j++) {
4655         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4656         for (l=0; l<sbs; l++){
4657           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4658           rowlen[j*sbs+l] = ncols;
4659           len += ncols;
4660           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4661         }
4662         k++;
4663       }
4664       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4665       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4666     }
4667     /* recvs and sends of i-array are completed */
4668     i = nrecvs;
4669     while (i--) {
4670       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4671     }
4672     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4673 
4674     /* allocate buffers for sending j and a arrays */
4675     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4676     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4677 
4678     /* create i-array of B_oth */
4679     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4680     b_othi[0] = 0;
4681     len = 0; /* total length of j or a array to be received */
4682     k = 0;
4683     for (i=0; i<nrecvs; i++){
4684       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4685       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4686       for (j=0; j<nrows; j++) {
4687         b_othi[k+1] = b_othi[k] + rowlen[j];
4688         len += rowlen[j]; k++;
4689       }
4690       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4691     }
4692 
4693     /* allocate space for j and a arrrays of B_oth */
4694     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4695     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4696 
4697     /* j-array */
4698     /*---------*/
4699     /*  post receives of j-array */
4700     for (i=0; i<nrecvs; i++){
4701       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4702       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4703     }
4704 
4705     /* pack the outgoing message j-array */
4706     k = 0;
4707     for (i=0; i<nsends; i++){
4708       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4709       bufJ = bufj+sstartsj[i];
4710       for (j=0; j<nrows; j++) {
4711         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4712         for (ll=0; ll<sbs; ll++){
4713           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4714           for (l=0; l<ncols; l++){
4715             *bufJ++ = cols[l];
4716           }
4717           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4718         }
4719       }
4720       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4721     }
4722 
4723     /* recvs and sends of j-array are completed */
4724     i = nrecvs;
4725     while (i--) {
4726       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4727     }
4728     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4729   } else if (scall == MAT_REUSE_MATRIX){
4730     sstartsj = *startsj;
4731     rstartsj = sstartsj + nsends +1;
4732     bufa     = *bufa_ptr;
4733     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4734     b_otha   = b_oth->a;
4735   } else {
4736     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4737   }
4738 
4739   /* a-array */
4740   /*---------*/
4741   /*  post receives of a-array */
4742   for (i=0; i<nrecvs; i++){
4743     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4744     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4745   }
4746 
4747   /* pack the outgoing message a-array */
4748   k = 0;
4749   for (i=0; i<nsends; i++){
4750     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4751     bufA = bufa+sstartsj[i];
4752     for (j=0; j<nrows; j++) {
4753       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4754       for (ll=0; ll<sbs; ll++){
4755         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4756         for (l=0; l<ncols; l++){
4757           *bufA++ = vals[l];
4758         }
4759         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4760       }
4761     }
4762     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4763   }
4764   /* recvs and sends of a-array are completed */
4765   i = nrecvs;
4766   while (i--) {
4767     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4768   }
4769   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4770   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4771 
4772   if (scall == MAT_INITIAL_MATRIX){
4773     /* put together the new matrix */
4774     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4775 
4776     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4777     /* Since these are PETSc arrays, change flags to free them as necessary. */
4778     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4779     b_oth->free_a  = PETSC_TRUE;
4780     b_oth->free_ij = PETSC_TRUE;
4781     b_oth->nonew   = 0;
4782 
4783     ierr = PetscFree(bufj);CHKERRQ(ierr);
4784     if (!startsj || !bufa_ptr){
4785       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4786       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4787     } else {
4788       *startsj  = sstartsj;
4789       *bufa_ptr = bufa;
4790     }
4791   }
4792   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4793   PetscFunctionReturn(0);
4794 }
4795 
4796 #undef __FUNCT__
4797 #define __FUNCT__ "MatGetCommunicationStructs"
4798 /*@C
4799   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4800 
4801   Not Collective
4802 
4803   Input Parameters:
4804 . A - The matrix in mpiaij format
4805 
4806   Output Parameter:
4807 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4808 . colmap - A map from global column index to local index into lvec
4809 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4810 
4811   Level: developer
4812 
4813 @*/
4814 #if defined (PETSC_USE_CTABLE)
4815 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4816 #else
4817 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4818 #endif
4819 {
4820   Mat_MPIAIJ *a;
4821 
4822   PetscFunctionBegin;
4823   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4824   PetscValidPointer(lvec, 2)
4825   PetscValidPointer(colmap, 3)
4826   PetscValidPointer(multScatter, 4)
4827   a = (Mat_MPIAIJ *) A->data;
4828   if (lvec) *lvec = a->lvec;
4829   if (colmap) *colmap = a->colmap;
4830   if (multScatter) *multScatter = a->Mvctx;
4831   PetscFunctionReturn(0);
4832 }
4833 
4834 EXTERN_C_BEGIN
4835 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*);
4836 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*);
4837 EXTERN_C_END
4838 
4839 #include "../src/mat/impls/dense/mpi/mpidense.h"
4840 
4841 #undef __FUNCT__
4842 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4843 /*
4844     Computes (B'*A')' since computing B*A directly is untenable
4845 
4846                n                       p                          p
4847         (              )       (              )         (                  )
4848       m (      A       )  *  n (       B      )   =   m (         C        )
4849         (              )       (              )         (                  )
4850 
4851 */
4852 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4853 {
4854   PetscErrorCode     ierr;
4855   Mat                At,Bt,Ct;
4856 
4857   PetscFunctionBegin;
4858   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4859   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4860   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4861   ierr = MatDestroy(At);CHKERRQ(ierr);
4862   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4863   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4864   ierr = MatDestroy(Ct);CHKERRQ(ierr);
4865   PetscFunctionReturn(0);
4866 }
4867 
4868 #undef __FUNCT__
4869 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4870 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4871 {
4872   PetscErrorCode ierr;
4873   PetscInt       m=A->rmap->n,n=B->cmap->n;
4874   Mat            Cmat;
4875 
4876   PetscFunctionBegin;
4877   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4878   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
4879   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4880   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4881   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
4882   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4883   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4884   *C   = Cmat;
4885   PetscFunctionReturn(0);
4886 }
4887 
4888 /* ----------------------------------------------------------------*/
4889 #undef __FUNCT__
4890 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4891 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4892 {
4893   PetscErrorCode ierr;
4894 
4895   PetscFunctionBegin;
4896   if (scall == MAT_INITIAL_MATRIX){
4897     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4898   }
4899   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4900   PetscFunctionReturn(0);
4901 }
4902 
4903 EXTERN_C_BEGIN
4904 #if defined(PETSC_HAVE_MUMPS)
4905 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*);
4906 #endif
4907 #if defined(PETSC_HAVE_PASTIX)
4908 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
4909 #endif
4910 #if defined(PETSC_HAVE_SUPERLU_DIST)
4911 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
4912 #endif
4913 #if defined(PETSC_HAVE_SPOOLES)
4914 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
4915 #endif
4916 EXTERN_C_END
4917 
4918 /*MC
4919    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4920 
4921    Options Database Keys:
4922 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4923 
4924   Level: beginner
4925 
4926 .seealso: MatCreateMPIAIJ()
4927 M*/
4928 
4929 EXTERN_C_BEGIN
4930 #undef __FUNCT__
4931 #define __FUNCT__ "MatCreate_MPIAIJ"
4932 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
4933 {
4934   Mat_MPIAIJ     *b;
4935   PetscErrorCode ierr;
4936   PetscMPIInt    size;
4937 
4938   PetscFunctionBegin;
4939   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
4940 
4941   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
4942   B->data         = (void*)b;
4943   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4944   B->rmap->bs      = 1;
4945   B->assembled    = PETSC_FALSE;
4946   B->mapping      = 0;
4947 
4948   B->insertmode      = NOT_SET_VALUES;
4949   b->size            = size;
4950   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
4951 
4952   /* build cache for off array entries formed */
4953   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
4954   b->donotstash  = PETSC_FALSE;
4955   b->colmap      = 0;
4956   b->garray      = 0;
4957   b->roworiented = PETSC_TRUE;
4958 
4959   /* stuff used for matrix vector multiply */
4960   b->lvec      = PETSC_NULL;
4961   b->Mvctx     = PETSC_NULL;
4962 
4963   /* stuff for MatGetRow() */
4964   b->rowindices   = 0;
4965   b->rowvalues    = 0;
4966   b->getrowactive = PETSC_FALSE;
4967 
4968 #if defined(PETSC_HAVE_SPOOLES)
4969   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_spooles_C",
4970                                      "MatGetFactor_mpiaij_spooles",
4971                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
4972 #endif
4973 #if defined(PETSC_HAVE_MUMPS)
4974   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_mumps_C",
4975                                      "MatGetFactor_mpiaij_mumps",
4976                                      MatGetFactor_mpiaij_mumps);CHKERRQ(ierr);
4977 #endif
4978 #if defined(PETSC_HAVE_PASTIX)
4979   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_pastix_C",
4980 					   "MatGetFactor_mpiaij_pastix",
4981 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
4982 #endif
4983 #if defined(PETSC_HAVE_SUPERLU_DIST)
4984   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_superlu_dist_C",
4985                                      "MatGetFactor_mpiaij_superlu_dist",
4986                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
4987 #endif
4988   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
4989                                      "MatStoreValues_MPIAIJ",
4990                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
4991   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
4992                                      "MatRetrieveValues_MPIAIJ",
4993                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
4994   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
4995 				     "MatGetDiagonalBlock_MPIAIJ",
4996                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
4997   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
4998 				     "MatIsTranspose_MPIAIJ",
4999 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5000   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5001 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5002 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5003   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5004 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5005 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5006   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5007 				     "MatDiagonalScaleLocal_MPIAIJ",
5008 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5009   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
5010                                      "MatConvert_MPIAIJ_MPICSRPERM",
5011                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
5012   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
5013                                      "MatConvert_MPIAIJ_MPICRL",
5014                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
5015   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5016                                      "MatMatMult_MPIDense_MPIAIJ",
5017                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5018   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5019                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5020                                       MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5021   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5022                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5023                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5024   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5025   PetscFunctionReturn(0);
5026 }
5027 EXTERN_C_END
5028 
5029 #undef __FUNCT__
5030 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5031 /*@
5032      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5033          and "off-diagonal" part of the matrix in CSR format.
5034 
5035    Collective on MPI_Comm
5036 
5037    Input Parameters:
5038 +  comm - MPI communicator
5039 .  m - number of local rows (Cannot be PETSC_DECIDE)
5040 .  n - This value should be the same as the local size used in creating the
5041        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5042        calculated if N is given) For square matrices n is almost always m.
5043 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5044 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5045 .   i - row indices for "diagonal" portion of matrix
5046 .   j - column indices
5047 .   a - matrix values
5048 .   oi - row indices for "off-diagonal" portion of matrix
5049 .   oj - column indices
5050 -   oa - matrix values
5051 
5052    Output Parameter:
5053 .   mat - the matrix
5054 
5055    Level: advanced
5056 
5057    Notes:
5058        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5059 
5060        The i and j indices are 0 based
5061 
5062        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5063 
5064 
5065 .keywords: matrix, aij, compressed row, sparse, parallel
5066 
5067 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5068           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5069 @*/
5070 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5071 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5072 {
5073   PetscErrorCode ierr;
5074   Mat_MPIAIJ     *maij;
5075 
5076  PetscFunctionBegin;
5077   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5078   if (i[0]) {
5079     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5080   }
5081   if (oi[0]) {
5082     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5083   }
5084   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5085   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5086   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5087   maij = (Mat_MPIAIJ*) (*mat)->data;
5088   maij->donotstash     = PETSC_TRUE;
5089   (*mat)->preallocated = PETSC_TRUE;
5090 
5091   ierr = PetscMapSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5092   ierr = PetscMapSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5093   ierr = PetscMapSetUp((*mat)->rmap);CHKERRQ(ierr);
5094   ierr = PetscMapSetUp((*mat)->cmap);CHKERRQ(ierr);
5095 
5096   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5097   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5098 
5099   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5100   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5101   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5102   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5103 
5104   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5105   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5106   PetscFunctionReturn(0);
5107 }
5108 
5109 /*
5110     Special version for direct calls from Fortran
5111 */
5112 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5113 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5114 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5115 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5116 #endif
5117 
5118 /* Change these macros so can be used in void function */
5119 #undef CHKERRQ
5120 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5121 #undef SETERRQ2
5122 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5123 #undef SETERRQ
5124 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5125 
5126 EXTERN_C_BEGIN
5127 #undef __FUNCT__
5128 #define __FUNCT__ "matsetvaluesmpiaij_"
5129 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5130 {
5131   Mat             mat = *mmat;
5132   PetscInt        m = *mm, n = *mn;
5133   InsertMode      addv = *maddv;
5134   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5135   PetscScalar     value;
5136   PetscErrorCode  ierr;
5137 
5138   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5139   if (mat->insertmode == NOT_SET_VALUES) {
5140     mat->insertmode = addv;
5141   }
5142 #if defined(PETSC_USE_DEBUG)
5143   else if (mat->insertmode != addv) {
5144     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5145   }
5146 #endif
5147   {
5148   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5149   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5150   PetscTruth      roworiented = aij->roworiented;
5151 
5152   /* Some Variables required in the macro */
5153   Mat             A = aij->A;
5154   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5155   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5156   MatScalar       *aa = a->a;
5157   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5158   Mat             B = aij->B;
5159   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5160   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5161   MatScalar       *ba = b->a;
5162 
5163   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5164   PetscInt        nonew = a->nonew;
5165   MatScalar       *ap1,*ap2;
5166 
5167   PetscFunctionBegin;
5168   for (i=0; i<m; i++) {
5169     if (im[i] < 0) continue;
5170 #if defined(PETSC_USE_DEBUG)
5171     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5172 #endif
5173     if (im[i] >= rstart && im[i] < rend) {
5174       row      = im[i] - rstart;
5175       lastcol1 = -1;
5176       rp1      = aj + ai[row];
5177       ap1      = aa + ai[row];
5178       rmax1    = aimax[row];
5179       nrow1    = ailen[row];
5180       low1     = 0;
5181       high1    = nrow1;
5182       lastcol2 = -1;
5183       rp2      = bj + bi[row];
5184       ap2      = ba + bi[row];
5185       rmax2    = bimax[row];
5186       nrow2    = bilen[row];
5187       low2     = 0;
5188       high2    = nrow2;
5189 
5190       for (j=0; j<n; j++) {
5191         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5192         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5193         if (in[j] >= cstart && in[j] < cend){
5194           col = in[j] - cstart;
5195           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5196         } else if (in[j] < 0) continue;
5197 #if defined(PETSC_USE_DEBUG)
5198         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5199 #endif
5200         else {
5201           if (mat->was_assembled) {
5202             if (!aij->colmap) {
5203               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5204             }
5205 #if defined (PETSC_USE_CTABLE)
5206             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5207 	    col--;
5208 #else
5209             col = aij->colmap[in[j]] - 1;
5210 #endif
5211             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5212               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5213               col =  in[j];
5214               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5215               B = aij->B;
5216               b = (Mat_SeqAIJ*)B->data;
5217               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5218               rp2      = bj + bi[row];
5219               ap2      = ba + bi[row];
5220               rmax2    = bimax[row];
5221               nrow2    = bilen[row];
5222               low2     = 0;
5223               high2    = nrow2;
5224               bm       = aij->B->rmap->n;
5225               ba = b->a;
5226             }
5227           } else col = in[j];
5228           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5229         }
5230       }
5231     } else {
5232       if (!aij->donotstash) {
5233         if (roworiented) {
5234           if (ignorezeroentries && v[i*n] == 0.0) continue;
5235           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
5236         } else {
5237           if (ignorezeroentries && v[i] == 0.0) continue;
5238           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
5239         }
5240       }
5241     }
5242   }}
5243   PetscFunctionReturnVoid();
5244 }
5245 EXTERN_C_END
5246 
5247