xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 88170d3c014dde9f19b263fccd87ca27ccefd46a)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "../src/inline/spops.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscTruth     aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap->n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap->n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap->n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap->n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 
207 #define CHUNKSIZE   15
208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
209 { \
210     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
211     lastcol1 = col;\
212     while (high1-low1 > 5) { \
213       t = (low1+high1)/2; \
214       if (rp1[t] > col) high1 = t; \
215       else             low1  = t; \
216     } \
217       for (_i=low1; _i<high1; _i++) { \
218         if (rp1[_i] > col) break; \
219         if (rp1[_i] == col) { \
220           if (addv == ADD_VALUES) ap1[_i] += value;   \
221           else                    ap1[_i] = value; \
222           goto a_noinsert; \
223         } \
224       }  \
225       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
226       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
227       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
228       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
229       N = nrow1++ - 1; a->nz++; high1++; \
230       /* shift up all the later entries in this row */ \
231       for (ii=N; ii>=_i; ii--) { \
232         rp1[ii+1] = rp1[ii]; \
233         ap1[ii+1] = ap1[ii]; \
234       } \
235       rp1[_i] = col;  \
236       ap1[_i] = value;  \
237       a_noinsert: ; \
238       ailen[row] = nrow1; \
239 }
240 
241 
242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
243 { \
244     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
245     lastcol2 = col;\
246     while (high2-low2 > 5) { \
247       t = (low2+high2)/2; \
248       if (rp2[t] > col) high2 = t; \
249       else             low2  = t; \
250     } \
251     for (_i=low2; _i<high2; _i++) {		\
252       if (rp2[_i] > col) break;			\
253       if (rp2[_i] == col) {			      \
254 	if (addv == ADD_VALUES) ap2[_i] += value;     \
255 	else                    ap2[_i] = value;      \
256 	goto b_noinsert;			      \
257       }						      \
258     }							      \
259     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
260     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
261     if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
262     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
263     N = nrow2++ - 1; b->nz++; high2++;					\
264     /* shift up all the later entries in this row */			\
265     for (ii=N; ii>=_i; ii--) {						\
266       rp2[ii+1] = rp2[ii];						\
267       ap2[ii+1] = ap2[ii];						\
268     }									\
269     rp2[_i] = col;							\
270     ap2[_i] = value;							\
271     b_noinsert: ;								\
272     bilen[row] = nrow2;							\
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
278 {
279   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
280   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
281   PetscErrorCode ierr;
282   PetscInt       l,*garray = mat->garray,diag;
283 
284   PetscFunctionBegin;
285   /* code only works for square matrices A */
286 
287   /* find size of row to the left of the diagonal part */
288   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
289   row  = row - diag;
290   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
291     if (garray[b->j[b->i[row]+l]] > diag) break;
292   }
293   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
294 
295   /* diagonal part */
296   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
297 
298   /* right of diagonal part */
299   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
300   PetscFunctionReturn(0);
301 }
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "MatSetValues_MPIAIJ"
305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
306 {
307   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
308   PetscScalar    value;
309   PetscErrorCode ierr;
310   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
311   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
312   PetscTruth     roworiented = aij->roworiented;
313 
314   /* Some Variables required in the macro */
315   Mat            A = aij->A;
316   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
317   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
318   MatScalar      *aa = a->a;
319   PetscTruth     ignorezeroentries = a->ignorezeroentries;
320   Mat            B = aij->B;
321   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
322   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
323   MatScalar      *ba = b->a;
324 
325   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
326   PetscInt       nonew = a->nonew;
327   MatScalar      *ap1,*ap2;
328 
329   PetscFunctionBegin;
330   for (i=0; i<m; i++) {
331     if (im[i] < 0) continue;
332 #if defined(PETSC_USE_DEBUG)
333     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
334 #endif
335     if (im[i] >= rstart && im[i] < rend) {
336       row      = im[i] - rstart;
337       lastcol1 = -1;
338       rp1      = aj + ai[row];
339       ap1      = aa + ai[row];
340       rmax1    = aimax[row];
341       nrow1    = ailen[row];
342       low1     = 0;
343       high1    = nrow1;
344       lastcol2 = -1;
345       rp2      = bj + bi[row];
346       ap2      = ba + bi[row];
347       rmax2    = bimax[row];
348       nrow2    = bilen[row];
349       low2     = 0;
350       high2    = nrow2;
351 
352       for (j=0; j<n; j++) {
353         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
354         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
355         if (in[j] >= cstart && in[j] < cend){
356           col = in[j] - cstart;
357           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
358         } else if (in[j] < 0) continue;
359 #if defined(PETSC_USE_DEBUG)
360         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
361 #endif
362         else {
363           if (mat->was_assembled) {
364             if (!aij->colmap) {
365               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
366             }
367 #if defined (PETSC_USE_CTABLE)
368             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
369 	    col--;
370 #else
371             col = aij->colmap[in[j]] - 1;
372 #endif
373             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
374               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
375               col =  in[j];
376               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
377               B = aij->B;
378               b = (Mat_SeqAIJ*)B->data;
379               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
380               rp2      = bj + bi[row];
381               ap2      = ba + bi[row];
382               rmax2    = bimax[row];
383               nrow2    = bilen[row];
384               low2     = 0;
385               high2    = nrow2;
386               bm       = aij->B->rmap->n;
387               ba = b->a;
388             }
389           } else col = in[j];
390           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
391         }
392       }
393     } else {
394       if (!aij->donotstash) {
395         if (roworiented) {
396           if (ignorezeroentries && v[i*n] == 0.0) continue;
397           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
398         } else {
399           if (ignorezeroentries && v[i] == 0.0) continue;
400           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
401         }
402       }
403     }
404   }
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNCT__
409 #define __FUNCT__ "MatGetValues_MPIAIJ"
410 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
411 {
412   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
413   PetscErrorCode ierr;
414   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
415   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
416 
417   PetscFunctionBegin;
418   for (i=0; i<m; i++) {
419     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
420     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
421     if (idxm[i] >= rstart && idxm[i] < rend) {
422       row = idxm[i] - rstart;
423       for (j=0; j<n; j++) {
424         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
425         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
426         if (idxn[j] >= cstart && idxn[j] < cend){
427           col = idxn[j] - cstart;
428           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
429         } else {
430           if (!aij->colmap) {
431             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
432           }
433 #if defined (PETSC_USE_CTABLE)
434           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
435           col --;
436 #else
437           col = aij->colmap[idxn[j]] - 1;
438 #endif
439           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
440           else {
441             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
442           }
443         }
444       }
445     } else {
446       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
447     }
448   }
449   PetscFunctionReturn(0);
450 }
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscErrorCode ierr;
458   PetscInt       nstash,reallocs;
459   InsertMode     addv;
460 
461   PetscFunctionBegin;
462   if (aij->donotstash) {
463     PetscFunctionReturn(0);
464   }
465 
466   /* make sure all processors are either in INSERTMODE or ADDMODE */
467   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
468   if (addv == (ADD_VALUES|INSERT_VALUES)) {
469     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
470   }
471   mat->insertmode = addv; /* in case this processor had no cache */
472 
473   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
474   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
475   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
482 {
483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
484   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
485   PetscErrorCode ierr;
486   PetscMPIInt    n;
487   PetscInt       i,j,rstart,ncols,flg;
488   PetscInt       *row,*col;
489   PetscTruth     other_disassembled;
490   PetscScalar    *val;
491   InsertMode     addv = mat->insertmode;
492 
493   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
494   PetscFunctionBegin;
495   if (!aij->donotstash) {
496     while (1) {
497       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
498       if (!flg) break;
499 
500       for (i=0; i<n;) {
501         /* Now identify the consecutive vals belonging to the same row */
502         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
503         if (j < n) ncols = j-i;
504         else       ncols = n-i;
505         /* Now assemble all these values with a single function call */
506         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
507         i = j;
508       }
509     }
510     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
511   }
512   a->compressedrow.use     = PETSC_FALSE;
513   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
514   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
515 
516   /* determine if any processor has disassembled, if so we must
517      also disassemble ourselfs, in order that we may reassemble. */
518   /*
519      if nonzero structure of submatrix B cannot change then we know that
520      no processor disassembled thus we can skip this stuff
521   */
522   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
523     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
524     if (mat->was_assembled && !other_disassembled) {
525       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
526     }
527   }
528   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
529     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
530   }
531   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
532   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
533   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
534   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
535 
536   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
537   aij->rowvalues = 0;
538 
539   /* used by MatAXPY() */
540   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
541   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
542 
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
549 {
550   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
551   PetscErrorCode ierr;
552 
553   PetscFunctionBegin;
554   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
555   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNCT__
560 #define __FUNCT__ "MatZeroRows_MPIAIJ"
561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
562 {
563   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
564   PetscErrorCode ierr;
565   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
566   PetscInt       i,*owners = A->rmap->range;
567   PetscInt       *nprocs,j,idx,nsends,row;
568   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
569   PetscInt       *rvalues,count,base,slen,*source;
570   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
571   MPI_Comm       comm = ((PetscObject)A)->comm;
572   MPI_Request    *send_waits,*recv_waits;
573   MPI_Status     recv_status,*send_status;
574 #if defined(PETSC_DEBUG)
575   PetscTruth     found = PETSC_FALSE;
576 #endif
577 
578   PetscFunctionBegin;
579   /*  first count number of contributors to each processor */
580   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
581   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
582   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
583   j = 0;
584   for (i=0; i<N; i++) {
585     if (lastidx > (idx = rows[i])) j = 0;
586     lastidx = idx;
587     for (; j<size; j++) {
588       if (idx >= owners[j] && idx < owners[j+1]) {
589         nprocs[2*j]++;
590         nprocs[2*j+1] = 1;
591         owner[i] = j;
592 #if defined(PETSC_DEBUG)
593         found = PETSC_TRUE;
594 #endif
595         break;
596       }
597     }
598 #if defined(PETSC_DEBUG)
599     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
600     found = PETSC_FALSE;
601 #endif
602   }
603   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
604 
605   /* inform other processors of number of messages and max length*/
606   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
607 
608   /* post receives:   */
609   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
610   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
611   for (i=0; i<nrecvs; i++) {
612     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
613   }
614 
615   /* do sends:
616       1) starts[i] gives the starting index in svalues for stuff going to
617          the ith processor
618   */
619   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
620   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
621   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
622   starts[0] = 0;
623   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
624   for (i=0; i<N; i++) {
625     svalues[starts[owner[i]]++] = rows[i];
626   }
627 
628   starts[0] = 0;
629   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
630   count = 0;
631   for (i=0; i<size; i++) {
632     if (nprocs[2*i+1]) {
633       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
634     }
635   }
636   ierr = PetscFree(starts);CHKERRQ(ierr);
637 
638   base = owners[rank];
639 
640   /*  wait on receives */
641   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
642   source = lens + nrecvs;
643   count  = nrecvs; slen = 0;
644   while (count) {
645     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
646     /* unpack receives into our local space */
647     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
648     source[imdex]  = recv_status.MPI_SOURCE;
649     lens[imdex]    = n;
650     slen          += n;
651     count--;
652   }
653   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
654 
655   /* move the data into the send scatter */
656   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
657   count = 0;
658   for (i=0; i<nrecvs; i++) {
659     values = rvalues + i*nmax;
660     for (j=0; j<lens[i]; j++) {
661       lrows[count++] = values[j] - base;
662     }
663   }
664   ierr = PetscFree(rvalues);CHKERRQ(ierr);
665   ierr = PetscFree(lens);CHKERRQ(ierr);
666   ierr = PetscFree(owner);CHKERRQ(ierr);
667   ierr = PetscFree(nprocs);CHKERRQ(ierr);
668 
669   /* actually zap the local rows */
670   /*
671         Zero the required rows. If the "diagonal block" of the matrix
672      is square and the user wishes to set the diagonal we use separate
673      code so that MatSetValues() is not called for each diagonal allocating
674      new memory, thus calling lots of mallocs and slowing things down.
675 
676        Contributed by: Matthew Knepley
677   */
678   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
679   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
680   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
681     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
682   } else if (diag != 0.0) {
683     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
684     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
685       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
687     }
688     for (i = 0; i < slen; i++) {
689       row  = lrows[i] + rstart;
690       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
691     }
692     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
693     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
694   } else {
695     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
696   }
697   ierr = PetscFree(lrows);CHKERRQ(ierr);
698 
699   /* wait on sends */
700   if (nsends) {
701     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
702     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
703     ierr = PetscFree(send_status);CHKERRQ(ierr);
704   }
705   ierr = PetscFree(send_waits);CHKERRQ(ierr);
706   ierr = PetscFree(svalues);CHKERRQ(ierr);
707 
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNCT__
712 #define __FUNCT__ "MatMult_MPIAIJ"
713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
714 {
715   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
716   PetscErrorCode ierr;
717   PetscInt       nt;
718 
719   PetscFunctionBegin;
720   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
721   if (nt != A->cmap->n) {
722     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
723   }
724   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
725   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
726   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
727   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
728   PetscFunctionReturn(0);
729 }
730 
731 #undef __FUNCT__
732 #define __FUNCT__ "MatMultAdd_MPIAIJ"
733 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
734 {
735   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
736   PetscErrorCode ierr;
737 
738   PetscFunctionBegin;
739   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
740   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
741   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
742   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
743   PetscFunctionReturn(0);
744 }
745 
746 #undef __FUNCT__
747 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
748 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
749 {
750   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
751   PetscErrorCode ierr;
752   PetscTruth     merged;
753 
754   PetscFunctionBegin;
755   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
756   /* do nondiagonal part */
757   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
758   if (!merged) {
759     /* send it on its way */
760     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
761     /* do local part */
762     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
763     /* receive remote parts: note this assumes the values are not actually */
764     /* added in yy until the next line, */
765     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
766   } else {
767     /* do local part */
768     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
769     /* send it on its way */
770     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
771     /* values actually were received in the Begin() but we need to call this nop */
772     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
773   }
774   PetscFunctionReturn(0);
775 }
776 
777 EXTERN_C_BEGIN
778 #undef __FUNCT__
779 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
780 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
781 {
782   MPI_Comm       comm;
783   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
784   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
785   IS             Me,Notme;
786   PetscErrorCode ierr;
787   PetscInt       M,N,first,last,*notme,i;
788   PetscMPIInt    size;
789 
790   PetscFunctionBegin;
791 
792   /* Easy test: symmetric diagonal block */
793   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
794   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
795   if (!*f) PetscFunctionReturn(0);
796   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
797   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
798   if (size == 1) PetscFunctionReturn(0);
799 
800   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
801   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
802   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
803   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
804   for (i=0; i<first; i++) notme[i] = i;
805   for (i=last; i<M; i++) notme[i-last+first] = i;
806   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
807   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
808   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
809   Aoff = Aoffs[0];
810   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
811   Boff = Boffs[0];
812   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
813   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
814   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
815   ierr = ISDestroy(Me);CHKERRQ(ierr);
816   ierr = ISDestroy(Notme);CHKERRQ(ierr);
817 
818   PetscFunctionReturn(0);
819 }
820 EXTERN_C_END
821 
822 #undef __FUNCT__
823 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
824 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
825 {
826   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
827   PetscErrorCode ierr;
828 
829   PetscFunctionBegin;
830   /* do nondiagonal part */
831   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
832   /* send it on its way */
833   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
834   /* do local part */
835   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
836   /* receive remote parts */
837   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
838   PetscFunctionReturn(0);
839 }
840 
841 /*
842   This only works correctly for square matrices where the subblock A->A is the
843    diagonal block
844 */
845 #undef __FUNCT__
846 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
847 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
848 {
849   PetscErrorCode ierr;
850   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
851 
852   PetscFunctionBegin;
853   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
854   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) {
855     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
856   }
857   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
858   PetscFunctionReturn(0);
859 }
860 
861 #undef __FUNCT__
862 #define __FUNCT__ "MatScale_MPIAIJ"
863 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
864 {
865   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
866   PetscErrorCode ierr;
867 
868   PetscFunctionBegin;
869   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
870   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
871   PetscFunctionReturn(0);
872 }
873 
874 #undef __FUNCT__
875 #define __FUNCT__ "MatDestroy_MPIAIJ"
876 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
877 {
878   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
879   PetscErrorCode ierr;
880 
881   PetscFunctionBegin;
882 #if defined(PETSC_USE_LOG)
883   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
884 #endif
885   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
886   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
887   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
888 #if defined (PETSC_USE_CTABLE)
889   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
890 #else
891   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
892 #endif
893   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
894   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
895   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
896   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
897   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
898   ierr = PetscFree(aij);CHKERRQ(ierr);
899 
900   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
901   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
902   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
903   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
904   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
905   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
906   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
907   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
908   PetscFunctionReturn(0);
909 }
910 
911 #undef __FUNCT__
912 #define __FUNCT__ "MatView_MPIAIJ_Binary"
913 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
914 {
915   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
916   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
917   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
918   PetscErrorCode    ierr;
919   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
920   int               fd;
921   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
922   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
923   PetscScalar       *column_values;
924 
925   PetscFunctionBegin;
926   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
927   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
928   nz   = A->nz + B->nz;
929   if (!rank) {
930     header[0] = MAT_FILE_COOKIE;
931     header[1] = mat->rmap->N;
932     header[2] = mat->cmap->N;
933     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
934     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
935     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
936     /* get largest number of rows any processor has */
937     rlen = mat->rmap->n;
938     range = mat->rmap->range;
939     for (i=1; i<size; i++) {
940       rlen = PetscMax(rlen,range[i+1] - range[i]);
941     }
942   } else {
943     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
944     rlen = mat->rmap->n;
945   }
946 
947   /* load up the local row counts */
948   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
949   for (i=0; i<mat->rmap->n; i++) {
950     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
951   }
952 
953   /* store the row lengths to the file */
954   if (!rank) {
955     MPI_Status status;
956     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
957     for (i=1; i<size; i++) {
958       rlen = range[i+1] - range[i];
959       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
960       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
961     }
962   } else {
963     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
964   }
965   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
966 
967   /* load up the local column indices */
968   nzmax = nz; /* )th processor needs space a largest processor needs */
969   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
970   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
971   cnt  = 0;
972   for (i=0; i<mat->rmap->n; i++) {
973     for (j=B->i[i]; j<B->i[i+1]; j++) {
974       if ( (col = garray[B->j[j]]) > cstart) break;
975       column_indices[cnt++] = col;
976     }
977     for (k=A->i[i]; k<A->i[i+1]; k++) {
978       column_indices[cnt++] = A->j[k] + cstart;
979     }
980     for (; j<B->i[i+1]; j++) {
981       column_indices[cnt++] = garray[B->j[j]];
982     }
983   }
984   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
985 
986   /* store the column indices to the file */
987   if (!rank) {
988     MPI_Status status;
989     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
990     for (i=1; i<size; i++) {
991       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
992       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
993       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
994       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
995     }
996   } else {
997     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
998     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
999   }
1000   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1001 
1002   /* load up the local column values */
1003   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1004   cnt  = 0;
1005   for (i=0; i<mat->rmap->n; i++) {
1006     for (j=B->i[i]; j<B->i[i+1]; j++) {
1007       if ( garray[B->j[j]] > cstart) break;
1008       column_values[cnt++] = B->a[j];
1009     }
1010     for (k=A->i[i]; k<A->i[i+1]; k++) {
1011       column_values[cnt++] = A->a[k];
1012     }
1013     for (; j<B->i[i+1]; j++) {
1014       column_values[cnt++] = B->a[j];
1015     }
1016   }
1017   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1018 
1019   /* store the column values to the file */
1020   if (!rank) {
1021     MPI_Status status;
1022     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1023     for (i=1; i<size; i++) {
1024       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1025       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1026       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1027       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1028     }
1029   } else {
1030     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1031     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1032   }
1033   ierr = PetscFree(column_values);CHKERRQ(ierr);
1034   PetscFunctionReturn(0);
1035 }
1036 
1037 #undef __FUNCT__
1038 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1039 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1040 {
1041   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1042   PetscErrorCode    ierr;
1043   PetscMPIInt       rank = aij->rank,size = aij->size;
1044   PetscTruth        isdraw,iascii,isbinary;
1045   PetscViewer       sviewer;
1046   PetscViewerFormat format;
1047 
1048   PetscFunctionBegin;
1049   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1050   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1051   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1052   if (iascii) {
1053     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1054     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1055       MatInfo    info;
1056       PetscTruth inodes;
1057 
1058       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1059       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1060       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1061       if (!inodes) {
1062         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1063 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1064       } else {
1065         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1066 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1067       }
1068       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1069       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1070       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1071       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1072       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1073       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1074       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1075       PetscFunctionReturn(0);
1076     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1077       PetscInt   inodecount,inodelimit,*inodes;
1078       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1079       if (inodes) {
1080         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1081       } else {
1082         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1083       }
1084       PetscFunctionReturn(0);
1085     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1086       PetscFunctionReturn(0);
1087     }
1088   } else if (isbinary) {
1089     if (size == 1) {
1090       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1091       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1092     } else {
1093       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1094     }
1095     PetscFunctionReturn(0);
1096   } else if (isdraw) {
1097     PetscDraw  draw;
1098     PetscTruth isnull;
1099     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1100     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1101   }
1102 
1103   if (size == 1) {
1104     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1105     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1106   } else {
1107     /* assemble the entire matrix onto first processor. */
1108     Mat         A;
1109     Mat_SeqAIJ  *Aloc;
1110     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1111     MatScalar   *a;
1112 
1113     if (mat->rmap->N > 1024) {
1114       PetscTruth flg = PETSC_FALSE;
1115 
1116       ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1117       if (!flg) {
1118         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1119       }
1120     }
1121 
1122     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1123     if (!rank) {
1124       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1125     } else {
1126       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1127     }
1128     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1129     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1130     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1131     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1132 
1133     /* copy over the A part */
1134     Aloc = (Mat_SeqAIJ*)aij->A->data;
1135     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1136     row = mat->rmap->rstart;
1137     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1138     for (i=0; i<m; i++) {
1139       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1140       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1141     }
1142     aj = Aloc->j;
1143     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1144 
1145     /* copy over the B part */
1146     Aloc = (Mat_SeqAIJ*)aij->B->data;
1147     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1148     row  = mat->rmap->rstart;
1149     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1150     ct   = cols;
1151     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1152     for (i=0; i<m; i++) {
1153       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1154       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1155     }
1156     ierr = PetscFree(ct);CHKERRQ(ierr);
1157     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1158     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1159     /*
1160        Everyone has to call to draw the matrix since the graphics waits are
1161        synchronized across all processors that share the PetscDraw object
1162     */
1163     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1164     if (!rank) {
1165       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1166       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1167     }
1168     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1169     ierr = MatDestroy(A);CHKERRQ(ierr);
1170   }
1171   PetscFunctionReturn(0);
1172 }
1173 
1174 #undef __FUNCT__
1175 #define __FUNCT__ "MatView_MPIAIJ"
1176 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1177 {
1178   PetscErrorCode ierr;
1179   PetscTruth     iascii,isdraw,issocket,isbinary;
1180 
1181   PetscFunctionBegin;
1182   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1183   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1184   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1185   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1186   if (iascii || isdraw || isbinary || issocket) {
1187     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1188   } else {
1189     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1190   }
1191   PetscFunctionReturn(0);
1192 }
1193 
1194 #undef __FUNCT__
1195 #define __FUNCT__ "MatRelax_MPIAIJ"
1196 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1197 {
1198   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1199   PetscErrorCode ierr;
1200   Vec            bb1;
1201 
1202   PetscFunctionBegin;
1203   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1204 
1205   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1206     if (flag & SOR_ZERO_INITIAL_GUESS) {
1207       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1208       its--;
1209     }
1210 
1211     while (its--) {
1212       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1213       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1214 
1215       /* update rhs: bb1 = bb - B*x */
1216       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1217       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1218 
1219       /* local sweep */
1220       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
1221     }
1222   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1223     if (flag & SOR_ZERO_INITIAL_GUESS) {
1224       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1225       its--;
1226     }
1227     while (its--) {
1228       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1229       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1230 
1231       /* update rhs: bb1 = bb - B*x */
1232       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1233       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1234 
1235       /* local sweep */
1236       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1237     }
1238   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1239     if (flag & SOR_ZERO_INITIAL_GUESS) {
1240       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1241       its--;
1242     }
1243     while (its--) {
1244       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1245       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1246 
1247       /* update rhs: bb1 = bb - B*x */
1248       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1249       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1250 
1251       /* local sweep */
1252       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1253     }
1254   } else {
1255     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1256   }
1257 
1258   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #undef __FUNCT__
1263 #define __FUNCT__ "MatPermute_MPIAIJ"
1264 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1265 {
1266   MPI_Comm       comm,pcomm;
1267   PetscInt       first,local_size,nrows;
1268   const PetscInt *rows;
1269   PetscMPIInt    size;
1270   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1271   PetscErrorCode ierr;
1272 
1273   PetscFunctionBegin;
1274   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1275   /* make a collective version of 'rowp' */
1276   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1277   if (pcomm==comm) {
1278     crowp = rowp;
1279   } else {
1280     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1281     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1282     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1283     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1284   }
1285   /* collect the global row permutation and invert it */
1286   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1287   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1288   if (pcomm!=comm) {
1289     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1290   }
1291   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1292   /* get the local target indices */
1293   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1294   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1295   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1296   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
1297   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1298   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1299   /* the column permutation is so much easier;
1300      make a local version of 'colp' and invert it */
1301   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1302   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1303   if (size==1) {
1304     lcolp = colp;
1305   } else {
1306     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1307     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1308     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
1309   }
1310   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1311   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1312   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1313   if (size>1) {
1314     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1315     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1316   }
1317   /* now we just get the submatrix */
1318   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1319   /* clean up */
1320   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1321   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 #undef __FUNCT__
1326 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1327 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1328 {
1329   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1330   Mat            A = mat->A,B = mat->B;
1331   PetscErrorCode ierr;
1332   PetscReal      isend[5],irecv[5];
1333 
1334   PetscFunctionBegin;
1335   info->block_size     = 1.0;
1336   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1337   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1338   isend[3] = info->memory;  isend[4] = info->mallocs;
1339   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1340   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1341   isend[3] += info->memory;  isend[4] += info->mallocs;
1342   if (flag == MAT_LOCAL) {
1343     info->nz_used      = isend[0];
1344     info->nz_allocated = isend[1];
1345     info->nz_unneeded  = isend[2];
1346     info->memory       = isend[3];
1347     info->mallocs      = isend[4];
1348   } else if (flag == MAT_GLOBAL_MAX) {
1349     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1350     info->nz_used      = irecv[0];
1351     info->nz_allocated = irecv[1];
1352     info->nz_unneeded  = irecv[2];
1353     info->memory       = irecv[3];
1354     info->mallocs      = irecv[4];
1355   } else if (flag == MAT_GLOBAL_SUM) {
1356     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1357     info->nz_used      = irecv[0];
1358     info->nz_allocated = irecv[1];
1359     info->nz_unneeded  = irecv[2];
1360     info->memory       = irecv[3];
1361     info->mallocs      = irecv[4];
1362   }
1363   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1364   info->fill_ratio_needed = 0;
1365   info->factor_mallocs    = 0;
1366 
1367   PetscFunctionReturn(0);
1368 }
1369 
1370 #undef __FUNCT__
1371 #define __FUNCT__ "MatSetOption_MPIAIJ"
1372 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1373 {
1374   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1375   PetscErrorCode ierr;
1376 
1377   PetscFunctionBegin;
1378   switch (op) {
1379   case MAT_NEW_NONZERO_LOCATIONS:
1380   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1381   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1382   case MAT_KEEP_ZEROED_ROWS:
1383   case MAT_NEW_NONZERO_LOCATION_ERR:
1384   case MAT_USE_INODES:
1385   case MAT_IGNORE_ZERO_ENTRIES:
1386     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1387     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1388     break;
1389   case MAT_ROW_ORIENTED:
1390     a->roworiented = flg;
1391     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1392     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1393     break;
1394   case MAT_NEW_DIAGONALS:
1395     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1396     break;
1397   case MAT_IGNORE_OFF_PROC_ENTRIES:
1398     a->donotstash = PETSC_TRUE;
1399     break;
1400   case MAT_SYMMETRIC:
1401     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1402     break;
1403   case MAT_STRUCTURALLY_SYMMETRIC:
1404   case MAT_HERMITIAN:
1405   case MAT_SYMMETRY_ETERNAL:
1406     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1407     break;
1408   default:
1409     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1410   }
1411   PetscFunctionReturn(0);
1412 }
1413 
1414 #undef __FUNCT__
1415 #define __FUNCT__ "MatGetRow_MPIAIJ"
1416 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1417 {
1418   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1419   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1420   PetscErrorCode ierr;
1421   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1422   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1423   PetscInt       *cmap,*idx_p;
1424 
1425   PetscFunctionBegin;
1426   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1427   mat->getrowactive = PETSC_TRUE;
1428 
1429   if (!mat->rowvalues && (idx || v)) {
1430     /*
1431         allocate enough space to hold information from the longest row.
1432     */
1433     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1434     PetscInt     max = 1,tmp;
1435     for (i=0; i<matin->rmap->n; i++) {
1436       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1437       if (max < tmp) { max = tmp; }
1438     }
1439     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1440     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1441   }
1442 
1443   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1444   lrow = row - rstart;
1445 
1446   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1447   if (!v)   {pvA = 0; pvB = 0;}
1448   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1449   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1450   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1451   nztot = nzA + nzB;
1452 
1453   cmap  = mat->garray;
1454   if (v  || idx) {
1455     if (nztot) {
1456       /* Sort by increasing column numbers, assuming A and B already sorted */
1457       PetscInt imark = -1;
1458       if (v) {
1459         *v = v_p = mat->rowvalues;
1460         for (i=0; i<nzB; i++) {
1461           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1462           else break;
1463         }
1464         imark = i;
1465         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1466         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1467       }
1468       if (idx) {
1469         *idx = idx_p = mat->rowindices;
1470         if (imark > -1) {
1471           for (i=0; i<imark; i++) {
1472             idx_p[i] = cmap[cworkB[i]];
1473           }
1474         } else {
1475           for (i=0; i<nzB; i++) {
1476             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1477             else break;
1478           }
1479           imark = i;
1480         }
1481         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1482         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1483       }
1484     } else {
1485       if (idx) *idx = 0;
1486       if (v)   *v   = 0;
1487     }
1488   }
1489   *nz = nztot;
1490   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1491   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1492   PetscFunctionReturn(0);
1493 }
1494 
1495 #undef __FUNCT__
1496 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1497 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1498 {
1499   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1500 
1501   PetscFunctionBegin;
1502   if (!aij->getrowactive) {
1503     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1504   }
1505   aij->getrowactive = PETSC_FALSE;
1506   PetscFunctionReturn(0);
1507 }
1508 
1509 #undef __FUNCT__
1510 #define __FUNCT__ "MatNorm_MPIAIJ"
1511 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1512 {
1513   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1514   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1515   PetscErrorCode ierr;
1516   PetscInt       i,j,cstart = mat->cmap->rstart;
1517   PetscReal      sum = 0.0;
1518   MatScalar      *v;
1519 
1520   PetscFunctionBegin;
1521   if (aij->size == 1) {
1522     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1523   } else {
1524     if (type == NORM_FROBENIUS) {
1525       v = amat->a;
1526       for (i=0; i<amat->nz; i++) {
1527 #if defined(PETSC_USE_COMPLEX)
1528         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1529 #else
1530         sum += (*v)*(*v); v++;
1531 #endif
1532       }
1533       v = bmat->a;
1534       for (i=0; i<bmat->nz; i++) {
1535 #if defined(PETSC_USE_COMPLEX)
1536         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1537 #else
1538         sum += (*v)*(*v); v++;
1539 #endif
1540       }
1541       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1542       *norm = sqrt(*norm);
1543     } else if (type == NORM_1) { /* max column norm */
1544       PetscReal *tmp,*tmp2;
1545       PetscInt  *jj,*garray = aij->garray;
1546       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1547       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1548       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1549       *norm = 0.0;
1550       v = amat->a; jj = amat->j;
1551       for (j=0; j<amat->nz; j++) {
1552         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1553       }
1554       v = bmat->a; jj = bmat->j;
1555       for (j=0; j<bmat->nz; j++) {
1556         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1557       }
1558       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1559       for (j=0; j<mat->cmap->N; j++) {
1560         if (tmp2[j] > *norm) *norm = tmp2[j];
1561       }
1562       ierr = PetscFree(tmp);CHKERRQ(ierr);
1563       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1564     } else if (type == NORM_INFINITY) { /* max row norm */
1565       PetscReal ntemp = 0.0;
1566       for (j=0; j<aij->A->rmap->n; j++) {
1567         v = amat->a + amat->i[j];
1568         sum = 0.0;
1569         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1570           sum += PetscAbsScalar(*v); v++;
1571         }
1572         v = bmat->a + bmat->i[j];
1573         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1574           sum += PetscAbsScalar(*v); v++;
1575         }
1576         if (sum > ntemp) ntemp = sum;
1577       }
1578       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1579     } else {
1580       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1581     }
1582   }
1583   PetscFunctionReturn(0);
1584 }
1585 
1586 #undef __FUNCT__
1587 #define __FUNCT__ "MatTranspose_MPIAIJ"
1588 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1589 {
1590   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1591   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1592   PetscErrorCode ierr;
1593   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1594   PetscInt       cstart=A->cmap->rstart,ncol;
1595   Mat            B;
1596   MatScalar      *array;
1597 
1598   PetscFunctionBegin;
1599   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1600 
1601   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1602   ai = Aloc->i; aj = Aloc->j;
1603   bi = Bloc->i; bj = Bloc->j;
1604   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1605     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1606     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1607     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1608     for (i=0; i<ai[ma]; i++){
1609       d_nnz[aj[i]] ++;
1610       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1611     }
1612 
1613     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1614     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1615     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1616     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1617     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1618   } else {
1619     B = *matout;
1620   }
1621 
1622   /* copy over the A part */
1623   array = Aloc->a;
1624   row = A->rmap->rstart;
1625   for (i=0; i<ma; i++) {
1626     ncol = ai[i+1]-ai[i];
1627     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1628     row++; array += ncol; aj += ncol;
1629   }
1630   aj = Aloc->j;
1631   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1632 
1633   /* copy over the B part */
1634   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1635   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1636   array = Bloc->a;
1637   row = A->rmap->rstart;
1638   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1639   cols_tmp = cols;
1640   for (i=0; i<mb; i++) {
1641     ncol = bi[i+1]-bi[i];
1642     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1643     row++; array += ncol; cols_tmp += ncol;
1644   }
1645   ierr = PetscFree(cols);CHKERRQ(ierr);
1646 
1647   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1648   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1649   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1650     *matout = B;
1651   } else {
1652     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1653   }
1654   PetscFunctionReturn(0);
1655 }
1656 
1657 #undef __FUNCT__
1658 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1659 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1660 {
1661   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1662   Mat            a = aij->A,b = aij->B;
1663   PetscErrorCode ierr;
1664   PetscInt       s1,s2,s3;
1665 
1666   PetscFunctionBegin;
1667   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1668   if (rr) {
1669     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1670     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1671     /* Overlap communication with computation. */
1672     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1673   }
1674   if (ll) {
1675     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1676     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1677     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1678   }
1679   /* scale  the diagonal block */
1680   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1681 
1682   if (rr) {
1683     /* Do a scatter end and then right scale the off-diagonal block */
1684     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1685     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1686   }
1687 
1688   PetscFunctionReturn(0);
1689 }
1690 
1691 #undef __FUNCT__
1692 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1693 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1694 {
1695   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1696   PetscErrorCode ierr;
1697 
1698   PetscFunctionBegin;
1699   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1700   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1701   PetscFunctionReturn(0);
1702 }
1703 #undef __FUNCT__
1704 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1705 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1706 {
1707   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1708   PetscErrorCode ierr;
1709 
1710   PetscFunctionBegin;
1711   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1712   PetscFunctionReturn(0);
1713 }
1714 
1715 #undef __FUNCT__
1716 #define __FUNCT__ "MatEqual_MPIAIJ"
1717 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1718 {
1719   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1720   Mat            a,b,c,d;
1721   PetscTruth     flg;
1722   PetscErrorCode ierr;
1723 
1724   PetscFunctionBegin;
1725   a = matA->A; b = matA->B;
1726   c = matB->A; d = matB->B;
1727 
1728   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1729   if (flg) {
1730     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1731   }
1732   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1733   PetscFunctionReturn(0);
1734 }
1735 
1736 #undef __FUNCT__
1737 #define __FUNCT__ "MatCopy_MPIAIJ"
1738 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1739 {
1740   PetscErrorCode ierr;
1741   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1742   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1743 
1744   PetscFunctionBegin;
1745   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1746   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1747     /* because of the column compression in the off-processor part of the matrix a->B,
1748        the number of columns in a->B and b->B may be different, hence we cannot call
1749        the MatCopy() directly on the two parts. If need be, we can provide a more
1750        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1751        then copying the submatrices */
1752     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1753   } else {
1754     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1755     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1756   }
1757   PetscFunctionReturn(0);
1758 }
1759 
1760 #undef __FUNCT__
1761 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1762 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1763 {
1764   PetscErrorCode ierr;
1765 
1766   PetscFunctionBegin;
1767   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1768   PetscFunctionReturn(0);
1769 }
1770 
1771 #include "petscblaslapack.h"
1772 #undef __FUNCT__
1773 #define __FUNCT__ "MatAXPY_MPIAIJ"
1774 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1775 {
1776   PetscErrorCode ierr;
1777   PetscInt       i;
1778   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1779   PetscBLASInt   bnz,one=1;
1780   Mat_SeqAIJ     *x,*y;
1781 
1782   PetscFunctionBegin;
1783   if (str == SAME_NONZERO_PATTERN) {
1784     PetscScalar alpha = a;
1785     x = (Mat_SeqAIJ *)xx->A->data;
1786     y = (Mat_SeqAIJ *)yy->A->data;
1787     bnz = PetscBLASIntCast(x->nz);
1788     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1789     x = (Mat_SeqAIJ *)xx->B->data;
1790     y = (Mat_SeqAIJ *)yy->B->data;
1791     bnz = PetscBLASIntCast(x->nz);
1792     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1793   } else if (str == SUBSET_NONZERO_PATTERN) {
1794     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1795 
1796     x = (Mat_SeqAIJ *)xx->B->data;
1797     y = (Mat_SeqAIJ *)yy->B->data;
1798     if (y->xtoy && y->XtoY != xx->B) {
1799       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1800       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1801     }
1802     if (!y->xtoy) { /* get xtoy */
1803       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1804       y->XtoY = xx->B;
1805       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1806     }
1807     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1808   } else {
1809     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1810   }
1811   PetscFunctionReturn(0);
1812 }
1813 
1814 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1815 
1816 #undef __FUNCT__
1817 #define __FUNCT__ "MatConjugate_MPIAIJ"
1818 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1819 {
1820 #if defined(PETSC_USE_COMPLEX)
1821   PetscErrorCode ierr;
1822   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1823 
1824   PetscFunctionBegin;
1825   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1826   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1827 #else
1828   PetscFunctionBegin;
1829 #endif
1830   PetscFunctionReturn(0);
1831 }
1832 
1833 #undef __FUNCT__
1834 #define __FUNCT__ "MatRealPart_MPIAIJ"
1835 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1836 {
1837   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1838   PetscErrorCode ierr;
1839 
1840   PetscFunctionBegin;
1841   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1842   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1843   PetscFunctionReturn(0);
1844 }
1845 
1846 #undef __FUNCT__
1847 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1848 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1849 {
1850   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1851   PetscErrorCode ierr;
1852 
1853   PetscFunctionBegin;
1854   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1855   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1856   PetscFunctionReturn(0);
1857 }
1858 
1859 #ifdef PETSC_HAVE_PBGL
1860 
1861 #include <boost/parallel/mpi/bsp_process_group.hpp>
1862 #include <boost/graph/distributed/ilu_default_graph.hpp>
1863 #include <boost/graph/distributed/ilu_0_block.hpp>
1864 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1865 #include <boost/graph/distributed/petsc/interface.hpp>
1866 #include <boost/multi_array.hpp>
1867 #include <boost/parallel/distributed_property_map->hpp>
1868 
1869 #undef __FUNCT__
1870 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1871 /*
1872   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1873 */
1874 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1875 {
1876   namespace petsc = boost::distributed::petsc;
1877 
1878   namespace graph_dist = boost::graph::distributed;
1879   using boost::graph::distributed::ilu_default::process_group_type;
1880   using boost::graph::ilu_permuted;
1881 
1882   PetscTruth      row_identity, col_identity;
1883   PetscContainer  c;
1884   PetscInt        m, n, M, N;
1885   PetscErrorCode  ierr;
1886 
1887   PetscFunctionBegin;
1888   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1889   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1890   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1891   if (!row_identity || !col_identity) {
1892     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1893   }
1894 
1895   process_group_type pg;
1896   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1897   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1898   lgraph_type&   level_graph = *lgraph_p;
1899   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1900 
1901   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1902   ilu_permuted(level_graph);
1903 
1904   /* put together the new matrix */
1905   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1906   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1907   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1908   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1909   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1910   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1911   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1912 
1913   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1914   ierr = PetscContainerSetPointer(c, lgraph_p);
1915   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1916   PetscFunctionReturn(0);
1917 }
1918 
1919 #undef __FUNCT__
1920 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1921 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1922 {
1923   PetscFunctionBegin;
1924   PetscFunctionReturn(0);
1925 }
1926 
1927 #undef __FUNCT__
1928 #define __FUNCT__ "MatSolve_MPIAIJ"
1929 /*
1930   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1931 */
1932 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1933 {
1934   namespace graph_dist = boost::graph::distributed;
1935 
1936   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1937   lgraph_type*   lgraph_p;
1938   PetscContainer c;
1939   PetscErrorCode ierr;
1940 
1941   PetscFunctionBegin;
1942   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1943   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1944   ierr = VecCopy(b, x);CHKERRQ(ierr);
1945 
1946   PetscScalar* array_x;
1947   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1948   PetscInt sx;
1949   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1950 
1951   PetscScalar* array_b;
1952   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1953   PetscInt sb;
1954   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1955 
1956   lgraph_type&   level_graph = *lgraph_p;
1957   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1958 
1959   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
1960   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
1961                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
1962 
1963   typedef boost::iterator_property_map<array_ref_type::iterator,
1964                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
1965   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
1966                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
1967 
1968   ilu_set_solve(*lgraph_p, vector_b, vector_x);
1969 
1970   PetscFunctionReturn(0);
1971 }
1972 #endif
1973 
1974 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
1975   PetscInt       nzlocal,nsends,nrecvs;
1976   PetscMPIInt    *send_rank;
1977   PetscInt       *sbuf_nz,*sbuf_j,**rbuf_j;
1978   PetscScalar    *sbuf_a,**rbuf_a;
1979   PetscErrorCode (*MatDestroy)(Mat);
1980 } Mat_Redundant;
1981 
1982 #undef __FUNCT__
1983 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
1984 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
1985 {
1986   PetscErrorCode       ierr;
1987   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
1988   PetscInt             i;
1989 
1990   PetscFunctionBegin;
1991   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
1992   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1993   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1994   for (i=0; i<redund->nrecvs; i++){
1995     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1996     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1997   }
1998   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1999   ierr = PetscFree(redund);CHKERRQ(ierr);
2000   PetscFunctionReturn(0);
2001 }
2002 
2003 #undef __FUNCT__
2004 #define __FUNCT__ "MatDestroy_MatRedundant"
2005 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2006 {
2007   PetscErrorCode  ierr;
2008   PetscContainer  container;
2009   Mat_Redundant   *redund=PETSC_NULL;
2010 
2011   PetscFunctionBegin;
2012   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2013   if (container) {
2014     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2015   } else {
2016     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2017   }
2018   A->ops->destroy = redund->MatDestroy;
2019   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2020   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2021   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2022   PetscFunctionReturn(0);
2023 }
2024 
2025 #undef __FUNCT__
2026 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2027 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2028 {
2029   PetscMPIInt    rank,size;
2030   MPI_Comm       comm=((PetscObject)mat)->comm;
2031   PetscErrorCode ierr;
2032   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2033   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2034   PetscInt       *rowrange=mat->rmap->range;
2035   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2036   Mat            A=aij->A,B=aij->B,C=*matredundant;
2037   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2038   PetscScalar    *sbuf_a;
2039   PetscInt       nzlocal=a->nz+b->nz;
2040   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2041   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2042   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2043   MatScalar      *aworkA,*aworkB;
2044   PetscScalar    *vals;
2045   PetscMPIInt    tag1,tag2,tag3,imdex;
2046   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2047                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2048   MPI_Status     recv_status,*send_status;
2049   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2050   PetscInt       **rbuf_j=PETSC_NULL;
2051   PetscScalar    **rbuf_a=PETSC_NULL;
2052   Mat_Redundant  *redund=PETSC_NULL;
2053   PetscContainer container;
2054 
2055   PetscFunctionBegin;
2056   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2057   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2058 
2059   if (reuse == MAT_REUSE_MATRIX) {
2060     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2061     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2062     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2063     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2064     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2065     if (container) {
2066       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2067     } else {
2068       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2069     }
2070     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2071 
2072     nsends    = redund->nsends;
2073     nrecvs    = redund->nrecvs;
2074     send_rank = redund->send_rank; recv_rank = send_rank + size;
2075     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
2076     sbuf_j    = redund->sbuf_j;
2077     sbuf_a    = redund->sbuf_a;
2078     rbuf_j    = redund->rbuf_j;
2079     rbuf_a    = redund->rbuf_a;
2080   }
2081 
2082   if (reuse == MAT_INITIAL_MATRIX){
2083     PetscMPIInt  subrank,subsize;
2084     PetscInt     nleftover,np_subcomm;
2085     /* get the destination processors' id send_rank, nsends and nrecvs */
2086     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2087     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2088     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
2089     recv_rank = send_rank + size;
2090     np_subcomm = size/nsubcomm;
2091     nleftover  = size - nsubcomm*np_subcomm;
2092     nsends = 0; nrecvs = 0;
2093     for (i=0; i<size; i++){ /* i=rank*/
2094       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2095         send_rank[nsends] = i; nsends++;
2096         recv_rank[nrecvs++] = i;
2097       }
2098     }
2099     if (rank >= size - nleftover){/* this proc is a leftover processor */
2100       i = size-nleftover-1;
2101       j = 0;
2102       while (j < nsubcomm - nleftover){
2103         send_rank[nsends++] = i;
2104         i--; j++;
2105       }
2106     }
2107 
2108     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2109       for (i=0; i<nleftover; i++){
2110         recv_rank[nrecvs++] = size-nleftover+i;
2111       }
2112     }
2113 
2114     /* allocate sbuf_j, sbuf_a */
2115     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2116     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2117     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2118   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2119 
2120   /* copy mat's local entries into the buffers */
2121   if (reuse == MAT_INITIAL_MATRIX){
2122     rownz_max = 0;
2123     rptr = sbuf_j;
2124     cols = sbuf_j + rend-rstart + 1;
2125     vals = sbuf_a;
2126     rptr[0] = 0;
2127     for (i=0; i<rend-rstart; i++){
2128       row = i + rstart;
2129       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2130       ncols  = nzA + nzB;
2131       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2132       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2133       /* load the column indices for this row into cols */
2134       lwrite = 0;
2135       for (l=0; l<nzB; l++) {
2136         if ((ctmp = bmap[cworkB[l]]) < cstart){
2137           vals[lwrite]   = aworkB[l];
2138           cols[lwrite++] = ctmp;
2139         }
2140       }
2141       for (l=0; l<nzA; l++){
2142         vals[lwrite]   = aworkA[l];
2143         cols[lwrite++] = cstart + cworkA[l];
2144       }
2145       for (l=0; l<nzB; l++) {
2146         if ((ctmp = bmap[cworkB[l]]) >= cend){
2147           vals[lwrite]   = aworkB[l];
2148           cols[lwrite++] = ctmp;
2149         }
2150       }
2151       vals += ncols;
2152       cols += ncols;
2153       rptr[i+1] = rptr[i] + ncols;
2154       if (rownz_max < ncols) rownz_max = ncols;
2155     }
2156     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2157   } else { /* only copy matrix values into sbuf_a */
2158     rptr = sbuf_j;
2159     vals = sbuf_a;
2160     rptr[0] = 0;
2161     for (i=0; i<rend-rstart; i++){
2162       row = i + rstart;
2163       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2164       ncols  = nzA + nzB;
2165       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2166       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2167       lwrite = 0;
2168       for (l=0; l<nzB; l++) {
2169         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2170       }
2171       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2172       for (l=0; l<nzB; l++) {
2173         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2174       }
2175       vals += ncols;
2176       rptr[i+1] = rptr[i] + ncols;
2177     }
2178   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2179 
2180   /* send nzlocal to others, and recv other's nzlocal */
2181   /*--------------------------------------------------*/
2182   if (reuse == MAT_INITIAL_MATRIX){
2183     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2184     s_waits2 = s_waits3 + nsends;
2185     s_waits1 = s_waits2 + nsends;
2186     r_waits1 = s_waits1 + nsends;
2187     r_waits2 = r_waits1 + nrecvs;
2188     r_waits3 = r_waits2 + nrecvs;
2189   } else {
2190     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2191     r_waits3 = s_waits3 + nsends;
2192   }
2193 
2194   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2195   if (reuse == MAT_INITIAL_MATRIX){
2196     /* get new tags to keep the communication clean */
2197     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2198     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2199     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2200     rbuf_nz = sbuf_nz + nsends;
2201 
2202     /* post receives of other's nzlocal */
2203     for (i=0; i<nrecvs; i++){
2204       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2205     }
2206     /* send nzlocal to others */
2207     for (i=0; i<nsends; i++){
2208       sbuf_nz[i] = nzlocal;
2209       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2210     }
2211     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2212     count = nrecvs;
2213     while (count) {
2214       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2215       recv_rank[imdex] = recv_status.MPI_SOURCE;
2216       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2217       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2218 
2219       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2220       rbuf_nz[imdex] += i + 2;
2221       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2222       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2223       count--;
2224     }
2225     /* wait on sends of nzlocal */
2226     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2227     /* send mat->i,j to others, and recv from other's */
2228     /*------------------------------------------------*/
2229     for (i=0; i<nsends; i++){
2230       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2231       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2232     }
2233     /* wait on receives of mat->i,j */
2234     /*------------------------------*/
2235     count = nrecvs;
2236     while (count) {
2237       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2238       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2239       count--;
2240     }
2241     /* wait on sends of mat->i,j */
2242     /*---------------------------*/
2243     if (nsends) {
2244       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2245     }
2246   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2247 
2248   /* post receives, send and receive mat->a */
2249   /*----------------------------------------*/
2250   for (imdex=0; imdex<nrecvs; imdex++) {
2251     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2252   }
2253   for (i=0; i<nsends; i++){
2254     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2255   }
2256   count = nrecvs;
2257   while (count) {
2258     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2259     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2260     count--;
2261   }
2262   if (nsends) {
2263     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2264   }
2265 
2266   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2267 
2268   /* create redundant matrix */
2269   /*-------------------------*/
2270   if (reuse == MAT_INITIAL_MATRIX){
2271     /* compute rownz_max for preallocation */
2272     for (imdex=0; imdex<nrecvs; imdex++){
2273       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2274       rptr = rbuf_j[imdex];
2275       for (i=0; i<j; i++){
2276         ncols = rptr[i+1] - rptr[i];
2277         if (rownz_max < ncols) rownz_max = ncols;
2278       }
2279     }
2280 
2281     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2282     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2283     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2284     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2285     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2286   } else {
2287     C = *matredundant;
2288   }
2289 
2290   /* insert local matrix entries */
2291   rptr = sbuf_j;
2292   cols = sbuf_j + rend-rstart + 1;
2293   vals = sbuf_a;
2294   for (i=0; i<rend-rstart; i++){
2295     row   = i + rstart;
2296     ncols = rptr[i+1] - rptr[i];
2297     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2298     vals += ncols;
2299     cols += ncols;
2300   }
2301   /* insert received matrix entries */
2302   for (imdex=0; imdex<nrecvs; imdex++){
2303     rstart = rowrange[recv_rank[imdex]];
2304     rend   = rowrange[recv_rank[imdex]+1];
2305     rptr = rbuf_j[imdex];
2306     cols = rbuf_j[imdex] + rend-rstart + 1;
2307     vals = rbuf_a[imdex];
2308     for (i=0; i<rend-rstart; i++){
2309       row   = i + rstart;
2310       ncols = rptr[i+1] - rptr[i];
2311       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2312       vals += ncols;
2313       cols += ncols;
2314     }
2315   }
2316   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2317   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2318   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2319   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2320   if (reuse == MAT_INITIAL_MATRIX){
2321     PetscContainer container;
2322     *matredundant = C;
2323     /* create a supporting struct and attach it to C for reuse */
2324     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2325     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2326     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2327     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2328     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2329 
2330     redund->nzlocal = nzlocal;
2331     redund->nsends  = nsends;
2332     redund->nrecvs  = nrecvs;
2333     redund->send_rank = send_rank;
2334     redund->sbuf_nz = sbuf_nz;
2335     redund->sbuf_j  = sbuf_j;
2336     redund->sbuf_a  = sbuf_a;
2337     redund->rbuf_j  = rbuf_j;
2338     redund->rbuf_a  = rbuf_a;
2339 
2340     redund->MatDestroy = C->ops->destroy;
2341     C->ops->destroy    = MatDestroy_MatRedundant;
2342   }
2343   PetscFunctionReturn(0);
2344 }
2345 
2346 #undef __FUNCT__
2347 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2348 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2349 {
2350   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2351   PetscErrorCode ierr;
2352   PetscInt       i,*idxb = 0;
2353   PetscScalar    *va,*vb;
2354   Vec            vtmp;
2355 
2356   PetscFunctionBegin;
2357   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2358   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2359   if (idx) {
2360     for (i=0; i<A->rmap->n; i++) {
2361       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2362     }
2363   }
2364 
2365   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2366   if (idx) {
2367     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2368   }
2369   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2370   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2371 
2372   for (i=0; i<A->rmap->n; i++){
2373     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2374       va[i] = vb[i];
2375       if (idx) idx[i] = a->garray[idxb[i]];
2376     }
2377   }
2378 
2379   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2380   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2381   if (idxb) {
2382     ierr = PetscFree(idxb);CHKERRQ(ierr);
2383   }
2384   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2385   PetscFunctionReturn(0);
2386 }
2387 
2388 #undef __FUNCT__
2389 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2390 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2391 {
2392   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2393   PetscErrorCode ierr;
2394   PetscInt       i,*idxb = 0;
2395   PetscScalar    *va,*vb;
2396   Vec            vtmp;
2397 
2398   PetscFunctionBegin;
2399   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2400   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2401   if (idx) {
2402     for (i=0; i<A->cmap->n; i++) {
2403       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2404     }
2405   }
2406 
2407   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2408   if (idx) {
2409     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2410   }
2411   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2412   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2413 
2414   for (i=0; i<A->rmap->n; i++){
2415     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2416       va[i] = vb[i];
2417       if (idx) idx[i] = a->garray[idxb[i]];
2418     }
2419   }
2420 
2421   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2422   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2423   if (idxb) {
2424     ierr = PetscFree(idxb);CHKERRQ(ierr);
2425   }
2426   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2427   PetscFunctionReturn(0);
2428 }
2429 
2430 #undef __FUNCT__
2431 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2432 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2433 {
2434   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2435   PetscInt       n      = A->rmap->n;
2436   PetscInt       cstart = A->cmap->rstart;
2437   PetscInt      *cmap   = mat->garray;
2438   PetscInt      *diagIdx, *offdiagIdx;
2439   Vec            diagV, offdiagV;
2440   PetscScalar   *a, *diagA, *offdiagA;
2441   PetscInt       r;
2442   PetscErrorCode ierr;
2443 
2444   PetscFunctionBegin;
2445   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2446   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2447   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2448   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2449   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2450   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2451   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2452   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2453   for(r = 0; r < n; ++r) {
2454     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2455       a[r]   = diagA[r];
2456       idx[r] = cstart + diagIdx[r];
2457     } else {
2458       a[r]   = offdiagA[r];
2459       idx[r] = cmap[offdiagIdx[r]];
2460     }
2461   }
2462   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2463   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2464   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2465   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2466   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2467   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2468   PetscFunctionReturn(0);
2469 }
2470 
2471 #undef __FUNCT__
2472 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2473 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2474 {
2475   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2476   PetscInt       n      = A->rmap->n;
2477   PetscInt       cstart = A->cmap->rstart;
2478   PetscInt      *cmap   = mat->garray;
2479   PetscInt      *diagIdx, *offdiagIdx;
2480   Vec            diagV, offdiagV;
2481   PetscScalar   *a, *diagA, *offdiagA;
2482   PetscInt       r;
2483   PetscErrorCode ierr;
2484 
2485   PetscFunctionBegin;
2486   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2487   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2488   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2489   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2490   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2491   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2492   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2493   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2494   for(r = 0; r < n; ++r) {
2495     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2496       a[r]   = diagA[r];
2497       idx[r] = cstart + diagIdx[r];
2498     } else {
2499       a[r]   = offdiagA[r];
2500       idx[r] = cmap[offdiagIdx[r]];
2501     }
2502   }
2503   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2504   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2505   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2506   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2507   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2508   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2509   PetscFunctionReturn(0);
2510 }
2511 
2512 #undef __FUNCT__
2513 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2514 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[])
2515 {
2516   PetscErrorCode ierr;
2517 
2518   PetscFunctionBegin;
2519   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
2520   PetscFunctionReturn(0);
2521 }
2522 
2523 /* -------------------------------------------------------------------*/
2524 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2525        MatGetRow_MPIAIJ,
2526        MatRestoreRow_MPIAIJ,
2527        MatMult_MPIAIJ,
2528 /* 4*/ MatMultAdd_MPIAIJ,
2529        MatMultTranspose_MPIAIJ,
2530        MatMultTransposeAdd_MPIAIJ,
2531 #ifdef PETSC_HAVE_PBGL
2532        MatSolve_MPIAIJ,
2533 #else
2534        0,
2535 #endif
2536        0,
2537        0,
2538 /*10*/ 0,
2539        0,
2540        0,
2541        MatRelax_MPIAIJ,
2542        MatTranspose_MPIAIJ,
2543 /*15*/ MatGetInfo_MPIAIJ,
2544        MatEqual_MPIAIJ,
2545        MatGetDiagonal_MPIAIJ,
2546        MatDiagonalScale_MPIAIJ,
2547        MatNorm_MPIAIJ,
2548 /*20*/ MatAssemblyBegin_MPIAIJ,
2549        MatAssemblyEnd_MPIAIJ,
2550        MatSetOption_MPIAIJ,
2551        MatZeroEntries_MPIAIJ,
2552 /*24*/ MatZeroRows_MPIAIJ,
2553        0,
2554 #ifdef PETSC_HAVE_PBGL
2555        0,
2556 #else
2557        0,
2558 #endif
2559        0,
2560        0,
2561 /*29*/ MatSetUpPreallocation_MPIAIJ,
2562 #ifdef PETSC_HAVE_PBGL
2563        0,
2564 #else
2565        0,
2566 #endif
2567        0,
2568        0,
2569        0,
2570 /*34*/ MatDuplicate_MPIAIJ,
2571        0,
2572        0,
2573        0,
2574        0,
2575 /*39*/ MatAXPY_MPIAIJ,
2576        MatGetSubMatrices_MPIAIJ,
2577        MatIncreaseOverlap_MPIAIJ,
2578        MatGetValues_MPIAIJ,
2579        MatCopy_MPIAIJ,
2580 /*44*/ MatGetRowMax_MPIAIJ,
2581        MatScale_MPIAIJ,
2582        0,
2583        0,
2584        0,
2585 /*49*/ MatSetBlockSize_MPIAIJ,
2586        0,
2587        0,
2588        0,
2589        0,
2590 /*54*/ MatFDColoringCreate_MPIAIJ,
2591        0,
2592        MatSetUnfactored_MPIAIJ,
2593        MatPermute_MPIAIJ,
2594        0,
2595 /*59*/ MatGetSubMatrix_MPIAIJ,
2596        MatDestroy_MPIAIJ,
2597        MatView_MPIAIJ,
2598        0,
2599        0,
2600 /*64*/ 0,
2601        0,
2602        0,
2603        0,
2604        0,
2605 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2606        MatGetRowMinAbs_MPIAIJ,
2607        0,
2608        MatSetColoring_MPIAIJ,
2609 #if defined(PETSC_HAVE_ADIC)
2610        MatSetValuesAdic_MPIAIJ,
2611 #else
2612        0,
2613 #endif
2614        MatSetValuesAdifor_MPIAIJ,
2615 /*75*/ 0,
2616        0,
2617        0,
2618        0,
2619        0,
2620 /*80*/ 0,
2621        0,
2622        0,
2623 /*83*/ MatLoad_MPIAIJ,
2624        0,
2625        0,
2626        0,
2627        0,
2628        0,
2629 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2630        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2631        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2632        MatPtAP_Basic,
2633        MatPtAPSymbolic_MPIAIJ,
2634 /*94*/ MatPtAPNumeric_MPIAIJ,
2635        0,
2636        0,
2637        0,
2638        0,
2639 /*99*/ 0,
2640        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2641        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2642        MatConjugate_MPIAIJ,
2643        0,
2644 /*104*/MatSetValuesRow_MPIAIJ,
2645        MatRealPart_MPIAIJ,
2646        MatImaginaryPart_MPIAIJ,
2647        0,
2648        0,
2649 /*109*/0,
2650        MatGetRedundantMatrix_MPIAIJ,
2651        MatGetRowMin_MPIAIJ,
2652        0,
2653        0,
2654 /*114*/MatGetSeqNonzerostructure_MPIAIJ};
2655 
2656 /* ----------------------------------------------------------------------------------------*/
2657 
2658 EXTERN_C_BEGIN
2659 #undef __FUNCT__
2660 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2661 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2662 {
2663   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2664   PetscErrorCode ierr;
2665 
2666   PetscFunctionBegin;
2667   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2668   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2669   PetscFunctionReturn(0);
2670 }
2671 EXTERN_C_END
2672 
2673 EXTERN_C_BEGIN
2674 #undef __FUNCT__
2675 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2676 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2677 {
2678   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2679   PetscErrorCode ierr;
2680 
2681   PetscFunctionBegin;
2682   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2683   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2684   PetscFunctionReturn(0);
2685 }
2686 EXTERN_C_END
2687 
2688 #include "petscpc.h"
2689 EXTERN_C_BEGIN
2690 #undef __FUNCT__
2691 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2692 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2693 {
2694   Mat_MPIAIJ     *b;
2695   PetscErrorCode ierr;
2696   PetscInt       i;
2697 
2698   PetscFunctionBegin;
2699   B->preallocated = PETSC_TRUE;
2700   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2701   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2702   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2703   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2704 
2705   ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2706   ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2707   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2708   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2709   if (d_nnz) {
2710     for (i=0; i<B->rmap->n; i++) {
2711       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2712     }
2713   }
2714   if (o_nnz) {
2715     for (i=0; i<B->rmap->n; i++) {
2716       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2717     }
2718   }
2719   b = (Mat_MPIAIJ*)B->data;
2720 
2721   /* Explicitly create 2 MATSEQAIJ matrices. */
2722   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2723   ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2724   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2725   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2726   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2727   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2728   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2729   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2730 
2731   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2732   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2733 
2734   PetscFunctionReturn(0);
2735 }
2736 EXTERN_C_END
2737 
2738 #undef __FUNCT__
2739 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2740 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2741 {
2742   Mat            mat;
2743   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2744   PetscErrorCode ierr;
2745 
2746   PetscFunctionBegin;
2747   *newmat       = 0;
2748   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2749   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2750   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2751   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2752   a    = (Mat_MPIAIJ*)mat->data;
2753 
2754   mat->factor       = matin->factor;
2755   mat->rmap->bs      = matin->rmap->bs;
2756   mat->assembled    = PETSC_TRUE;
2757   mat->insertmode   = NOT_SET_VALUES;
2758   mat->preallocated = PETSC_TRUE;
2759 
2760   a->size           = oldmat->size;
2761   a->rank           = oldmat->rank;
2762   a->donotstash     = oldmat->donotstash;
2763   a->roworiented    = oldmat->roworiented;
2764   a->rowindices     = 0;
2765   a->rowvalues      = 0;
2766   a->getrowactive   = PETSC_FALSE;
2767 
2768   ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr);
2769   ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr);
2770 
2771   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2772   if (oldmat->colmap) {
2773 #if defined (PETSC_USE_CTABLE)
2774     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2775 #else
2776     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2777     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2778     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2779 #endif
2780   } else a->colmap = 0;
2781   if (oldmat->garray) {
2782     PetscInt len;
2783     len  = oldmat->B->cmap->n;
2784     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2785     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2786     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2787   } else a->garray = 0;
2788 
2789   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2790   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2791   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2792   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2793   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2794   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2795   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2796   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2797   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2798   *newmat = mat;
2799   PetscFunctionReturn(0);
2800 }
2801 
2802 #include "petscsys.h"
2803 
2804 #undef __FUNCT__
2805 #define __FUNCT__ "MatLoad_MPIAIJ"
2806 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2807 {
2808   Mat            A;
2809   PetscScalar    *vals,*svals;
2810   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2811   MPI_Status     status;
2812   PetscErrorCode ierr;
2813   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz;
2814   PetscInt       i,nz,j,rstart,rend,mmax,maxnz;
2815   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2816   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2817   PetscInt       cend,cstart,n,*rowners;
2818   int            fd;
2819 
2820   PetscFunctionBegin;
2821   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2822   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2823   if (!rank) {
2824     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2825     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2826     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2827   }
2828 
2829   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2830   M = header[1]; N = header[2];
2831   /* determine ownership of all rows */
2832   m    = M/size + ((M % size) > rank);
2833   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2834   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2835 
2836   /* First process needs enough room for process with most rows */
2837   if (!rank) {
2838     mmax       = rowners[1];
2839     for (i=2; i<size; i++) {
2840       mmax = PetscMax(mmax,rowners[i]);
2841     }
2842   } else mmax = m;
2843 
2844   rowners[0] = 0;
2845   for (i=2; i<=size; i++) {
2846     rowners[i] += rowners[i-1];
2847   }
2848   rstart = rowners[rank];
2849   rend   = rowners[rank+1];
2850 
2851   /* distribute row lengths to all processors */
2852   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2853   if (!rank) {
2854     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2855     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2856     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2857     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2858     for (j=0; j<m; j++) {
2859       procsnz[0] += ourlens[j];
2860     }
2861     for (i=1; i<size; i++) {
2862       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2863       /* calculate the number of nonzeros on each processor */
2864       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2865         procsnz[i] += rowlengths[j];
2866       }
2867       mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]);
2868       ierr   = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2869     }
2870     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2871   } else {
2872     mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr);
2873     ierr   = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2874   }
2875 
2876   if (!rank) {
2877     /* determine max buffer needed and allocate it */
2878     maxnz = 0;
2879     for (i=0; i<size; i++) {
2880       maxnz = PetscMax(maxnz,procsnz[i]);
2881     }
2882     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2883 
2884     /* read in my part of the matrix column indices  */
2885     nz   = procsnz[0];
2886     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2887     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2888 
2889     /* read in every one elses and ship off */
2890     for (i=1; i<size; i++) {
2891       nz     = procsnz[i];
2892       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2893       mpicnt = PetscMPIIntCast(nz);
2894       ierr   = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2895     }
2896     ierr = PetscFree(cols);CHKERRQ(ierr);
2897   } else {
2898     /* determine buffer space needed for message */
2899     nz = 0;
2900     for (i=0; i<m; i++) {
2901       nz += ourlens[i];
2902     }
2903     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2904 
2905     /* receive message of column indices*/
2906     mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr);
2907     ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2908     ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr);
2909     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
2910     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
2911     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
2912   }
2913 
2914   /* determine column ownership if matrix is not square */
2915   if (N != M) {
2916     n      = N/size + ((N % size) > rank);
2917     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2918     cstart = cend - n;
2919   } else {
2920     cstart = rstart;
2921     cend   = rend;
2922     n      = cend - cstart;
2923   }
2924 
2925   /* loop over local rows, determining number of off diagonal entries */
2926   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2927   jj = 0;
2928   for (i=0; i<m; i++) {
2929     for (j=0; j<ourlens[i]; j++) {
2930       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2931       jj++;
2932     }
2933   }
2934 
2935   /* create our matrix */
2936   for (i=0; i<m; i++) {
2937     ourlens[i] -= offlens[i];
2938   }
2939   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2940   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2941   ierr = MatSetType(A,type);CHKERRQ(ierr);
2942   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2943 
2944   for (i=0; i<m; i++) {
2945     ourlens[i] += offlens[i];
2946   }
2947 
2948   if (!rank) {
2949     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2950 
2951     /* read in my part of the matrix numerical values  */
2952     nz   = procsnz[0];
2953     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2954 
2955     /* insert into matrix */
2956     jj      = rstart;
2957     smycols = mycols;
2958     svals   = vals;
2959     for (i=0; i<m; i++) {
2960       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2961       smycols += ourlens[i];
2962       svals   += ourlens[i];
2963       jj++;
2964     }
2965 
2966     /* read in other processors and ship out */
2967     for (i=1; i<size; i++) {
2968       nz     = procsnz[i];
2969       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2970       mpicnt = PetscMPIIntCast(nz);
2971       ierr   = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2972     }
2973     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2974   } else {
2975     /* receive numeric values */
2976     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2977 
2978     /* receive message of values*/
2979     mpicnt = PetscMPIIntCast(nz);
2980     ierr   = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2981     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr);
2982     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
2983     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
2984     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
2985 
2986     /* insert into matrix */
2987     jj      = rstart;
2988     smycols = mycols;
2989     svals   = vals;
2990     for (i=0; i<m; i++) {
2991       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2992       smycols += ourlens[i];
2993       svals   += ourlens[i];
2994       jj++;
2995     }
2996   }
2997   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2998   ierr = PetscFree(vals);CHKERRQ(ierr);
2999   ierr = PetscFree(mycols);CHKERRQ(ierr);
3000   ierr = PetscFree(rowners);CHKERRQ(ierr);
3001 
3002   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3003   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3004   *newmat = A;
3005   PetscFunctionReturn(0);
3006 }
3007 
3008 #undef __FUNCT__
3009 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3010 /*
3011     Not great since it makes two copies of the submatrix, first an SeqAIJ
3012   in local and then by concatenating the local matrices the end result.
3013   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3014 */
3015 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3016 {
3017   PetscErrorCode ierr;
3018   PetscMPIInt    rank,size;
3019   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3020   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3021   Mat            *local,M,Mreuse;
3022   MatScalar      *vwork,*aa;
3023   MPI_Comm       comm = ((PetscObject)mat)->comm;
3024   Mat_SeqAIJ     *aij;
3025 
3026 
3027   PetscFunctionBegin;
3028   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3029   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3030 
3031   if (call ==  MAT_REUSE_MATRIX) {
3032     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3033     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3034     local = &Mreuse;
3035     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3036   } else {
3037     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3038     Mreuse = *local;
3039     ierr   = PetscFree(local);CHKERRQ(ierr);
3040   }
3041 
3042   /*
3043       m - number of local rows
3044       n - number of columns (same on all processors)
3045       rstart - first row in new global matrix generated
3046   */
3047   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3048   if (call == MAT_INITIAL_MATRIX) {
3049     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3050     ii  = aij->i;
3051     jj  = aij->j;
3052 
3053     /*
3054         Determine the number of non-zeros in the diagonal and off-diagonal
3055         portions of the matrix in order to do correct preallocation
3056     */
3057 
3058     /* first get start and end of "diagonal" columns */
3059     if (csize == PETSC_DECIDE) {
3060       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3061       if (mglobal == n) { /* square matrix */
3062 	nlocal = m;
3063       } else {
3064         nlocal = n/size + ((n % size) > rank);
3065       }
3066     } else {
3067       nlocal = csize;
3068     }
3069     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3070     rstart = rend - nlocal;
3071     if (rank == size - 1 && rend != n) {
3072       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3073     }
3074 
3075     /* next, compute all the lengths */
3076     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3077     olens = dlens + m;
3078     for (i=0; i<m; i++) {
3079       jend = ii[i+1] - ii[i];
3080       olen = 0;
3081       dlen = 0;
3082       for (j=0; j<jend; j++) {
3083         if (*jj < rstart || *jj >= rend) olen++;
3084         else dlen++;
3085         jj++;
3086       }
3087       olens[i] = olen;
3088       dlens[i] = dlen;
3089     }
3090     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3091     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3092     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3093     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3094     ierr = PetscFree(dlens);CHKERRQ(ierr);
3095   } else {
3096     PetscInt ml,nl;
3097 
3098     M = *newmat;
3099     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3100     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3101     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3102     /*
3103          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3104        rather than the slower MatSetValues().
3105     */
3106     M->was_assembled = PETSC_TRUE;
3107     M->assembled     = PETSC_FALSE;
3108   }
3109   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3110   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3111   ii  = aij->i;
3112   jj  = aij->j;
3113   aa  = aij->a;
3114   for (i=0; i<m; i++) {
3115     row   = rstart + i;
3116     nz    = ii[i+1] - ii[i];
3117     cwork = jj;     jj += nz;
3118     vwork = aa;     aa += nz;
3119     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3120   }
3121 
3122   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3123   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3124   *newmat = M;
3125 
3126   /* save submatrix used in processor for next request */
3127   if (call ==  MAT_INITIAL_MATRIX) {
3128     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3129     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3130   }
3131 
3132   PetscFunctionReturn(0);
3133 }
3134 
3135 EXTERN_C_BEGIN
3136 #undef __FUNCT__
3137 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3138 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3139 {
3140   PetscInt       m,cstart, cend,j,nnz,i,d;
3141   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3142   const PetscInt *JJ;
3143   PetscScalar    *values;
3144   PetscErrorCode ierr;
3145 
3146   PetscFunctionBegin;
3147   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3148 
3149   ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3150   ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3151   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
3152   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
3153   m      = B->rmap->n;
3154   cstart = B->cmap->rstart;
3155   cend   = B->cmap->rend;
3156   rstart = B->rmap->rstart;
3157 
3158   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
3159   o_nnz = d_nnz + m;
3160 
3161 #if defined(PETSC_USE_DEBUGGING)
3162   for (i=0; i<m; i++) {
3163     nnz     = Ii[i+1]- Ii[i];
3164     JJ      = J + Ii[i];
3165     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3166     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3167     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3168     for (j=1; j<nnz; j++) {
3169       if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j);
3170     }
3171   }
3172 #endif
3173 
3174   for (i=0; i<m; i++) {
3175     nnz     = Ii[i+1]- Ii[i];
3176     JJ      = J + Ii[i];
3177     nnz_max = PetscMax(nnz_max,nnz);
3178     for (j=0; j<nnz; j++) {
3179       if (*JJ >= cstart) break;
3180       JJ++;
3181     }
3182     d = 0;
3183     for (; j<nnz; j++) {
3184       if (*JJ++ >= cend) break;
3185       d++;
3186     }
3187     d_nnz[i] = d;
3188     o_nnz[i] = nnz - d;
3189   }
3190   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3191   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
3192 
3193   if (v) values = (PetscScalar*)v;
3194   else {
3195     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3196     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3197   }
3198 
3199   for (i=0; i<m; i++) {
3200     ii   = i + rstart;
3201     nnz  = Ii[i+1]- Ii[i];
3202     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3203   }
3204   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3205   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3206 
3207   if (!v) {
3208     ierr = PetscFree(values);CHKERRQ(ierr);
3209   }
3210   PetscFunctionReturn(0);
3211 }
3212 EXTERN_C_END
3213 
3214 #undef __FUNCT__
3215 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3216 /*@
3217    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3218    (the default parallel PETSc format).
3219 
3220    Collective on MPI_Comm
3221 
3222    Input Parameters:
3223 +  B - the matrix
3224 .  i - the indices into j for the start of each local row (starts with zero)
3225 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3226 -  v - optional values in the matrix
3227 
3228    Level: developer
3229 
3230    Notes:
3231        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3232      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3233      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3234 
3235        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3236 
3237        The format which is used for the sparse matrix input, is equivalent to a
3238     row-major ordering.. i.e for the following matrix, the input data expected is
3239     as shown:
3240 
3241         1 0 0
3242         2 0 3     P0
3243        -------
3244         4 5 6     P1
3245 
3246      Process0 [P0]: rows_owned=[0,1]
3247         i =  {0,1,3}  [size = nrow+1  = 2+1]
3248         j =  {0,0,2}  [size = nz = 6]
3249         v =  {1,2,3}  [size = nz = 6]
3250 
3251      Process1 [P1]: rows_owned=[2]
3252         i =  {0,3}    [size = nrow+1  = 1+1]
3253         j =  {0,1,2}  [size = nz = 6]
3254         v =  {4,5,6}  [size = nz = 6]
3255 
3256       The column indices for each row MUST be sorted.
3257 
3258 .keywords: matrix, aij, compressed row, sparse, parallel
3259 
3260 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3261           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3262 @*/
3263 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3264 {
3265   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3266 
3267   PetscFunctionBegin;
3268   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3269   if (f) {
3270     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3271   }
3272   PetscFunctionReturn(0);
3273 }
3274 
3275 #undef __FUNCT__
3276 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3277 /*@C
3278    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3279    (the default parallel PETSc format).  For good matrix assembly performance
3280    the user should preallocate the matrix storage by setting the parameters
3281    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3282    performance can be increased by more than a factor of 50.
3283 
3284    Collective on MPI_Comm
3285 
3286    Input Parameters:
3287 +  A - the matrix
3288 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3289            (same value is used for all local rows)
3290 .  d_nnz - array containing the number of nonzeros in the various rows of the
3291            DIAGONAL portion of the local submatrix (possibly different for each row)
3292            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3293            The size of this array is equal to the number of local rows, i.e 'm'.
3294            You must leave room for the diagonal entry even if it is zero.
3295 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3296            submatrix (same value is used for all local rows).
3297 -  o_nnz - array containing the number of nonzeros in the various rows of the
3298            OFF-DIAGONAL portion of the local submatrix (possibly different for
3299            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3300            structure. The size of this array is equal to the number
3301            of local rows, i.e 'm'.
3302 
3303    If the *_nnz parameter is given then the *_nz parameter is ignored
3304 
3305    The AIJ format (also called the Yale sparse matrix format or
3306    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3307    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3308 
3309    The parallel matrix is partitioned such that the first m0 rows belong to
3310    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3311    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3312 
3313    The DIAGONAL portion of the local submatrix of a processor can be defined
3314    as the submatrix which is obtained by extraction the part corresponding
3315    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3316    first row that belongs to the processor, and r2 is the last row belonging
3317    to the this processor. This is a square mxm matrix. The remaining portion
3318    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3319 
3320    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3321 
3322    You can call MatGetInfo() to get information on how effective the preallocation was;
3323    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3324    You can also run with the option -info and look for messages with the string
3325    malloc in them to see if additional memory allocation was needed.
3326 
3327    Example usage:
3328 
3329    Consider the following 8x8 matrix with 34 non-zero values, that is
3330    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3331    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3332    as follows:
3333 
3334 .vb
3335             1  2  0  |  0  3  0  |  0  4
3336     Proc0   0  5  6  |  7  0  0  |  8  0
3337             9  0 10  | 11  0  0  | 12  0
3338     -------------------------------------
3339            13  0 14  | 15 16 17  |  0  0
3340     Proc1   0 18  0  | 19 20 21  |  0  0
3341             0  0  0  | 22 23  0  | 24  0
3342     -------------------------------------
3343     Proc2  25 26 27  |  0  0 28  | 29  0
3344            30  0  0  | 31 32 33  |  0 34
3345 .ve
3346 
3347    This can be represented as a collection of submatrices as:
3348 
3349 .vb
3350       A B C
3351       D E F
3352       G H I
3353 .ve
3354 
3355    Where the submatrices A,B,C are owned by proc0, D,E,F are
3356    owned by proc1, G,H,I are owned by proc2.
3357 
3358    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3359    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3360    The 'M','N' parameters are 8,8, and have the same values on all procs.
3361 
3362    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3363    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3364    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3365    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3366    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3367    matrix, ans [DF] as another SeqAIJ matrix.
3368 
3369    When d_nz, o_nz parameters are specified, d_nz storage elements are
3370    allocated for every row of the local diagonal submatrix, and o_nz
3371    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3372    One way to choose d_nz and o_nz is to use the max nonzerors per local
3373    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3374    In this case, the values of d_nz,o_nz are:
3375 .vb
3376      proc0 : dnz = 2, o_nz = 2
3377      proc1 : dnz = 3, o_nz = 2
3378      proc2 : dnz = 1, o_nz = 4
3379 .ve
3380    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3381    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3382    for proc3. i.e we are using 12+15+10=37 storage locations to store
3383    34 values.
3384 
3385    When d_nnz, o_nnz parameters are specified, the storage is specified
3386    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3387    In the above case the values for d_nnz,o_nnz are:
3388 .vb
3389      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3390      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3391      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3392 .ve
3393    Here the space allocated is sum of all the above values i.e 34, and
3394    hence pre-allocation is perfect.
3395 
3396    Level: intermediate
3397 
3398 .keywords: matrix, aij, compressed row, sparse, parallel
3399 
3400 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3401           MPIAIJ, MatGetInfo()
3402 @*/
3403 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3404 {
3405   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3406 
3407   PetscFunctionBegin;
3408   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3409   if (f) {
3410     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3411   }
3412   PetscFunctionReturn(0);
3413 }
3414 
3415 #undef __FUNCT__
3416 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3417 /*@
3418      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3419          CSR format the local rows.
3420 
3421    Collective on MPI_Comm
3422 
3423    Input Parameters:
3424 +  comm - MPI communicator
3425 .  m - number of local rows (Cannot be PETSC_DECIDE)
3426 .  n - This value should be the same as the local size used in creating the
3427        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3428        calculated if N is given) For square matrices n is almost always m.
3429 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3430 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3431 .   i - row indices
3432 .   j - column indices
3433 -   a - matrix values
3434 
3435    Output Parameter:
3436 .   mat - the matrix
3437 
3438    Level: intermediate
3439 
3440    Notes:
3441        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3442      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3443      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3444 
3445        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3446 
3447        The format which is used for the sparse matrix input, is equivalent to a
3448     row-major ordering.. i.e for the following matrix, the input data expected is
3449     as shown:
3450 
3451         1 0 0
3452         2 0 3     P0
3453        -------
3454         4 5 6     P1
3455 
3456      Process0 [P0]: rows_owned=[0,1]
3457         i =  {0,1,3}  [size = nrow+1  = 2+1]
3458         j =  {0,0,2}  [size = nz = 6]
3459         v =  {1,2,3}  [size = nz = 6]
3460 
3461      Process1 [P1]: rows_owned=[2]
3462         i =  {0,3}    [size = nrow+1  = 1+1]
3463         j =  {0,1,2}  [size = nz = 6]
3464         v =  {4,5,6}  [size = nz = 6]
3465 
3466 .keywords: matrix, aij, compressed row, sparse, parallel
3467 
3468 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3469           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3470 @*/
3471 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3472 {
3473   PetscErrorCode ierr;
3474 
3475  PetscFunctionBegin;
3476   if (i[0]) {
3477     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3478   }
3479   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3480   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3481   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3482   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3483   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3484   PetscFunctionReturn(0);
3485 }
3486 
3487 #undef __FUNCT__
3488 #define __FUNCT__ "MatCreateMPIAIJ"
3489 /*@C
3490    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3491    (the default parallel PETSc format).  For good matrix assembly performance
3492    the user should preallocate the matrix storage by setting the parameters
3493    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3494    performance can be increased by more than a factor of 50.
3495 
3496    Collective on MPI_Comm
3497 
3498    Input Parameters:
3499 +  comm - MPI communicator
3500 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3501            This value should be the same as the local size used in creating the
3502            y vector for the matrix-vector product y = Ax.
3503 .  n - This value should be the same as the local size used in creating the
3504        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3505        calculated if N is given) For square matrices n is almost always m.
3506 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3507 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3508 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3509            (same value is used for all local rows)
3510 .  d_nnz - array containing the number of nonzeros in the various rows of the
3511            DIAGONAL portion of the local submatrix (possibly different for each row)
3512            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3513            The size of this array is equal to the number of local rows, i.e 'm'.
3514            You must leave room for the diagonal entry even if it is zero.
3515 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3516            submatrix (same value is used for all local rows).
3517 -  o_nnz - array containing the number of nonzeros in the various rows of the
3518            OFF-DIAGONAL portion of the local submatrix (possibly different for
3519            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3520            structure. The size of this array is equal to the number
3521            of local rows, i.e 'm'.
3522 
3523    Output Parameter:
3524 .  A - the matrix
3525 
3526    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3527    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3528    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3529 
3530    Notes:
3531    If the *_nnz parameter is given then the *_nz parameter is ignored
3532 
3533    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3534    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3535    storage requirements for this matrix.
3536 
3537    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3538    processor than it must be used on all processors that share the object for
3539    that argument.
3540 
3541    The user MUST specify either the local or global matrix dimensions
3542    (possibly both).
3543 
3544    The parallel matrix is partitioned across processors such that the
3545    first m0 rows belong to process 0, the next m1 rows belong to
3546    process 1, the next m2 rows belong to process 2 etc.. where
3547    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3548    values corresponding to [m x N] submatrix.
3549 
3550    The columns are logically partitioned with the n0 columns belonging
3551    to 0th partition, the next n1 columns belonging to the next
3552    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3553 
3554    The DIAGONAL portion of the local submatrix on any given processor
3555    is the submatrix corresponding to the rows and columns m,n
3556    corresponding to the given processor. i.e diagonal matrix on
3557    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3558    etc. The remaining portion of the local submatrix [m x (N-n)]
3559    constitute the OFF-DIAGONAL portion. The example below better
3560    illustrates this concept.
3561 
3562    For a square global matrix we define each processor's diagonal portion
3563    to be its local rows and the corresponding columns (a square submatrix);
3564    each processor's off-diagonal portion encompasses the remainder of the
3565    local matrix (a rectangular submatrix).
3566 
3567    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3568 
3569    When calling this routine with a single process communicator, a matrix of
3570    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3571    type of communicator, use the construction mechanism:
3572      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3573 
3574    By default, this format uses inodes (identical nodes) when possible.
3575    We search for consecutive rows with the same nonzero structure, thereby
3576    reusing matrix information to achieve increased efficiency.
3577 
3578    Options Database Keys:
3579 +  -mat_no_inode  - Do not use inodes
3580 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3581 -  -mat_aij_oneindex - Internally use indexing starting at 1
3582         rather than 0.  Note that when calling MatSetValues(),
3583         the user still MUST index entries starting at 0!
3584 
3585 
3586    Example usage:
3587 
3588    Consider the following 8x8 matrix with 34 non-zero values, that is
3589    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3590    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3591    as follows:
3592 
3593 .vb
3594             1  2  0  |  0  3  0  |  0  4
3595     Proc0   0  5  6  |  7  0  0  |  8  0
3596             9  0 10  | 11  0  0  | 12  0
3597     -------------------------------------
3598            13  0 14  | 15 16 17  |  0  0
3599     Proc1   0 18  0  | 19 20 21  |  0  0
3600             0  0  0  | 22 23  0  | 24  0
3601     -------------------------------------
3602     Proc2  25 26 27  |  0  0 28  | 29  0
3603            30  0  0  | 31 32 33  |  0 34
3604 .ve
3605 
3606    This can be represented as a collection of submatrices as:
3607 
3608 .vb
3609       A B C
3610       D E F
3611       G H I
3612 .ve
3613 
3614    Where the submatrices A,B,C are owned by proc0, D,E,F are
3615    owned by proc1, G,H,I are owned by proc2.
3616 
3617    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3618    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3619    The 'M','N' parameters are 8,8, and have the same values on all procs.
3620 
3621    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3622    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3623    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3624    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3625    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3626    matrix, ans [DF] as another SeqAIJ matrix.
3627 
3628    When d_nz, o_nz parameters are specified, d_nz storage elements are
3629    allocated for every row of the local diagonal submatrix, and o_nz
3630    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3631    One way to choose d_nz and o_nz is to use the max nonzerors per local
3632    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3633    In this case, the values of d_nz,o_nz are:
3634 .vb
3635      proc0 : dnz = 2, o_nz = 2
3636      proc1 : dnz = 3, o_nz = 2
3637      proc2 : dnz = 1, o_nz = 4
3638 .ve
3639    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3640    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3641    for proc3. i.e we are using 12+15+10=37 storage locations to store
3642    34 values.
3643 
3644    When d_nnz, o_nnz parameters are specified, the storage is specified
3645    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3646    In the above case the values for d_nnz,o_nnz are:
3647 .vb
3648      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3649      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3650      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3651 .ve
3652    Here the space allocated is sum of all the above values i.e 34, and
3653    hence pre-allocation is perfect.
3654 
3655    Level: intermediate
3656 
3657 .keywords: matrix, aij, compressed row, sparse, parallel
3658 
3659 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3660           MPIAIJ, MatCreateMPIAIJWithArrays()
3661 @*/
3662 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3663 {
3664   PetscErrorCode ierr;
3665   PetscMPIInt    size;
3666 
3667   PetscFunctionBegin;
3668   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3669   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3670   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3671   if (size > 1) {
3672     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3673     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3674   } else {
3675     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3676     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3677   }
3678   PetscFunctionReturn(0);
3679 }
3680 
3681 #undef __FUNCT__
3682 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3683 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3684 {
3685   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3686 
3687   PetscFunctionBegin;
3688   *Ad     = a->A;
3689   *Ao     = a->B;
3690   *colmap = a->garray;
3691   PetscFunctionReturn(0);
3692 }
3693 
3694 #undef __FUNCT__
3695 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3696 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3697 {
3698   PetscErrorCode ierr;
3699   PetscInt       i;
3700   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3701 
3702   PetscFunctionBegin;
3703   if (coloring->ctype == IS_COLORING_GLOBAL) {
3704     ISColoringValue *allcolors,*colors;
3705     ISColoring      ocoloring;
3706 
3707     /* set coloring for diagonal portion */
3708     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3709 
3710     /* set coloring for off-diagonal portion */
3711     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3712     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3713     for (i=0; i<a->B->cmap->n; i++) {
3714       colors[i] = allcolors[a->garray[i]];
3715     }
3716     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3717     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3718     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3719     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3720   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3721     ISColoringValue *colors;
3722     PetscInt        *larray;
3723     ISColoring      ocoloring;
3724 
3725     /* set coloring for diagonal portion */
3726     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3727     for (i=0; i<a->A->cmap->n; i++) {
3728       larray[i] = i + A->cmap->rstart;
3729     }
3730     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3731     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3732     for (i=0; i<a->A->cmap->n; i++) {
3733       colors[i] = coloring->colors[larray[i]];
3734     }
3735     ierr = PetscFree(larray);CHKERRQ(ierr);
3736     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3737     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3738     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3739 
3740     /* set coloring for off-diagonal portion */
3741     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3742     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3743     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3744     for (i=0; i<a->B->cmap->n; i++) {
3745       colors[i] = coloring->colors[larray[i]];
3746     }
3747     ierr = PetscFree(larray);CHKERRQ(ierr);
3748     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3749     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3750     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3751   } else {
3752     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3753   }
3754 
3755   PetscFunctionReturn(0);
3756 }
3757 
3758 #if defined(PETSC_HAVE_ADIC)
3759 #undef __FUNCT__
3760 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3761 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3762 {
3763   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3764   PetscErrorCode ierr;
3765 
3766   PetscFunctionBegin;
3767   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3768   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3769   PetscFunctionReturn(0);
3770 }
3771 #endif
3772 
3773 #undef __FUNCT__
3774 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3775 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3776 {
3777   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3778   PetscErrorCode ierr;
3779 
3780   PetscFunctionBegin;
3781   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3782   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3783   PetscFunctionReturn(0);
3784 }
3785 
3786 #undef __FUNCT__
3787 #define __FUNCT__ "MatMerge"
3788 /*@
3789       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3790                  matrices from each processor
3791 
3792     Collective on MPI_Comm
3793 
3794    Input Parameters:
3795 +    comm - the communicators the parallel matrix will live on
3796 .    inmat - the input sequential matrices
3797 .    n - number of local columns (or PETSC_DECIDE)
3798 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3799 
3800    Output Parameter:
3801 .    outmat - the parallel matrix generated
3802 
3803     Level: advanced
3804 
3805    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3806 
3807 @*/
3808 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3809 {
3810   PetscErrorCode ierr;
3811   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3812   PetscInt       *indx;
3813   PetscScalar    *values;
3814 
3815   PetscFunctionBegin;
3816   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3817   if (scall == MAT_INITIAL_MATRIX){
3818     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3819     if (n == PETSC_DECIDE){
3820       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3821     }
3822     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3823     rstart -= m;
3824 
3825     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3826     for (i=0;i<m;i++) {
3827       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3828       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3829       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3830     }
3831     /* This routine will ONLY return MPIAIJ type matrix */
3832     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3833     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3834     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3835     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3836     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3837 
3838   } else if (scall == MAT_REUSE_MATRIX){
3839     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3840   } else {
3841     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3842   }
3843 
3844   for (i=0;i<m;i++) {
3845     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3846     Ii    = i + rstart;
3847     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3848     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3849   }
3850   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3851   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3852   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3853 
3854   PetscFunctionReturn(0);
3855 }
3856 
3857 #undef __FUNCT__
3858 #define __FUNCT__ "MatFileSplit"
3859 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3860 {
3861   PetscErrorCode    ierr;
3862   PetscMPIInt       rank;
3863   PetscInt          m,N,i,rstart,nnz;
3864   size_t            len;
3865   const PetscInt    *indx;
3866   PetscViewer       out;
3867   char              *name;
3868   Mat               B;
3869   const PetscScalar *values;
3870 
3871   PetscFunctionBegin;
3872   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3873   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3874   /* Should this be the type of the diagonal block of A? */
3875   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3876   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3877   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3878   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3879   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3880   for (i=0;i<m;i++) {
3881     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3882     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3883     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3884   }
3885   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3886   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3887 
3888   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3889   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3890   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3891   sprintf(name,"%s.%d",outfile,rank);
3892   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3893   ierr = PetscFree(name);
3894   ierr = MatView(B,out);CHKERRQ(ierr);
3895   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3896   ierr = MatDestroy(B);CHKERRQ(ierr);
3897   PetscFunctionReturn(0);
3898 }
3899 
3900 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3901 #undef __FUNCT__
3902 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3903 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3904 {
3905   PetscErrorCode       ierr;
3906   Mat_Merge_SeqsToMPI  *merge;
3907   PetscContainer       container;
3908 
3909   PetscFunctionBegin;
3910   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3911   if (container) {
3912     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3913     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3914     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3915     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3916     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3917     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3918     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3919     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3920     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3921     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3922     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3923     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
3924 
3925     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
3926     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3927   }
3928   ierr = PetscFree(merge);CHKERRQ(ierr);
3929 
3930   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3931   PetscFunctionReturn(0);
3932 }
3933 
3934 #include "../src/mat/utils/freespace.h"
3935 #include "petscbt.h"
3936 
3937 #undef __FUNCT__
3938 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3939 /*@C
3940       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3941                  matrices from each processor
3942 
3943     Collective on MPI_Comm
3944 
3945    Input Parameters:
3946 +    comm - the communicators the parallel matrix will live on
3947 .    seqmat - the input sequential matrices
3948 .    m - number of local rows (or PETSC_DECIDE)
3949 .    n - number of local columns (or PETSC_DECIDE)
3950 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3951 
3952    Output Parameter:
3953 .    mpimat - the parallel matrix generated
3954 
3955     Level: advanced
3956 
3957    Notes:
3958      The dimensions of the sequential matrix in each processor MUST be the same.
3959      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3960      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3961 @*/
3962 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3963 {
3964   PetscErrorCode       ierr;
3965   MPI_Comm             comm=((PetscObject)mpimat)->comm;
3966   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3967   PetscMPIInt          size,rank,taga,*len_s;
3968   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
3969   PetscInt             proc,m;
3970   PetscInt             **buf_ri,**buf_rj;
3971   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3972   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3973   MPI_Request          *s_waits,*r_waits;
3974   MPI_Status           *status;
3975   MatScalar            *aa=a->a;
3976   MatScalar            **abuf_r,*ba_i;
3977   Mat_Merge_SeqsToMPI  *merge;
3978   PetscContainer       container;
3979 
3980   PetscFunctionBegin;
3981   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3982 
3983   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3984   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3985 
3986   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3987   if (container) {
3988     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3989   }
3990   bi     = merge->bi;
3991   bj     = merge->bj;
3992   buf_ri = merge->buf_ri;
3993   buf_rj = merge->buf_rj;
3994 
3995   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3996   owners = merge->rowmap.range;
3997   len_s  = merge->len_s;
3998 
3999   /* send and recv matrix values */
4000   /*-----------------------------*/
4001   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4002   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4003 
4004   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4005   for (proc=0,k=0; proc<size; proc++){
4006     if (!len_s[proc]) continue;
4007     i = owners[proc];
4008     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4009     k++;
4010   }
4011 
4012   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4013   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4014   ierr = PetscFree(status);CHKERRQ(ierr);
4015 
4016   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4017   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4018 
4019   /* insert mat values of mpimat */
4020   /*----------------------------*/
4021   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4022   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4023   nextrow = buf_ri_k + merge->nrecv;
4024   nextai  = nextrow + merge->nrecv;
4025 
4026   for (k=0; k<merge->nrecv; k++){
4027     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4028     nrows = *(buf_ri_k[k]);
4029     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4030     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4031   }
4032 
4033   /* set values of ba */
4034   m = merge->rowmap.n;
4035   for (i=0; i<m; i++) {
4036     arow = owners[rank] + i;
4037     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4038     bnzi = bi[i+1] - bi[i];
4039     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4040 
4041     /* add local non-zero vals of this proc's seqmat into ba */
4042     anzi = ai[arow+1] - ai[arow];
4043     aj   = a->j + ai[arow];
4044     aa   = a->a + ai[arow];
4045     nextaj = 0;
4046     for (j=0; nextaj<anzi; j++){
4047       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4048         ba_i[j] += aa[nextaj++];
4049       }
4050     }
4051 
4052     /* add received vals into ba */
4053     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4054       /* i-th row */
4055       if (i == *nextrow[k]) {
4056         anzi = *(nextai[k]+1) - *nextai[k];
4057         aj   = buf_rj[k] + *(nextai[k]);
4058         aa   = abuf_r[k] + *(nextai[k]);
4059         nextaj = 0;
4060         for (j=0; nextaj<anzi; j++){
4061           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4062             ba_i[j] += aa[nextaj++];
4063           }
4064         }
4065         nextrow[k]++; nextai[k]++;
4066       }
4067     }
4068     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4069   }
4070   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4071   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4072 
4073   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4074   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4075   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4076   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4077   PetscFunctionReturn(0);
4078 }
4079 
4080 #undef __FUNCT__
4081 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4082 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4083 {
4084   PetscErrorCode       ierr;
4085   Mat                  B_mpi;
4086   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4087   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4088   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4089   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4090   PetscInt             len,proc,*dnz,*onz;
4091   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4092   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4093   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4094   MPI_Status           *status;
4095   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4096   PetscBT              lnkbt;
4097   Mat_Merge_SeqsToMPI  *merge;
4098   PetscContainer       container;
4099 
4100   PetscFunctionBegin;
4101   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4102 
4103   /* make sure it is a PETSc comm */
4104   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4105   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4106   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4107 
4108   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4109   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4110 
4111   /* determine row ownership */
4112   /*---------------------------------------------------------*/
4113   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
4114   merge->rowmap.n = m;
4115   merge->rowmap.N = M;
4116   merge->rowmap.bs = 1;
4117   ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr);
4118   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4119   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4120 
4121   m      = merge->rowmap.n;
4122   M      = merge->rowmap.N;
4123   owners = merge->rowmap.range;
4124 
4125   /* determine the number of messages to send, their lengths */
4126   /*---------------------------------------------------------*/
4127   len_s  = merge->len_s;
4128 
4129   len = 0;  /* length of buf_si[] */
4130   merge->nsend = 0;
4131   for (proc=0; proc<size; proc++){
4132     len_si[proc] = 0;
4133     if (proc == rank){
4134       len_s[proc] = 0;
4135     } else {
4136       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4137       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4138     }
4139     if (len_s[proc]) {
4140       merge->nsend++;
4141       nrows = 0;
4142       for (i=owners[proc]; i<owners[proc+1]; i++){
4143         if (ai[i+1] > ai[i]) nrows++;
4144       }
4145       len_si[proc] = 2*(nrows+1);
4146       len += len_si[proc];
4147     }
4148   }
4149 
4150   /* determine the number and length of messages to receive for ij-structure */
4151   /*-------------------------------------------------------------------------*/
4152   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4153   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4154 
4155   /* post the Irecv of j-structure */
4156   /*-------------------------------*/
4157   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4158   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4159 
4160   /* post the Isend of j-structure */
4161   /*--------------------------------*/
4162   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
4163   sj_waits = si_waits + merge->nsend;
4164 
4165   for (proc=0, k=0; proc<size; proc++){
4166     if (!len_s[proc]) continue;
4167     i = owners[proc];
4168     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4169     k++;
4170   }
4171 
4172   /* receives and sends of j-structure are complete */
4173   /*------------------------------------------------*/
4174   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4175   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4176 
4177   /* send and recv i-structure */
4178   /*---------------------------*/
4179   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4180   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4181 
4182   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4183   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4184   for (proc=0,k=0; proc<size; proc++){
4185     if (!len_s[proc]) continue;
4186     /* form outgoing message for i-structure:
4187          buf_si[0]:                 nrows to be sent
4188                [1:nrows]:           row index (global)
4189                [nrows+1:2*nrows+1]: i-structure index
4190     */
4191     /*-------------------------------------------*/
4192     nrows = len_si[proc]/2 - 1;
4193     buf_si_i    = buf_si + nrows+1;
4194     buf_si[0]   = nrows;
4195     buf_si_i[0] = 0;
4196     nrows = 0;
4197     for (i=owners[proc]; i<owners[proc+1]; i++){
4198       anzi = ai[i+1] - ai[i];
4199       if (anzi) {
4200         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4201         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4202         nrows++;
4203       }
4204     }
4205     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4206     k++;
4207     buf_si += len_si[proc];
4208   }
4209 
4210   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4211   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4212 
4213   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4214   for (i=0; i<merge->nrecv; i++){
4215     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4216   }
4217 
4218   ierr = PetscFree(len_si);CHKERRQ(ierr);
4219   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4220   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4221   ierr = PetscFree(si_waits);CHKERRQ(ierr);
4222   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4223   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4224   ierr = PetscFree(status);CHKERRQ(ierr);
4225 
4226   /* compute a local seq matrix in each processor */
4227   /*----------------------------------------------*/
4228   /* allocate bi array and free space for accumulating nonzero column info */
4229   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4230   bi[0] = 0;
4231 
4232   /* create and initialize a linked list */
4233   nlnk = N+1;
4234   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4235 
4236   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4237   len = 0;
4238   len  = ai[owners[rank+1]] - ai[owners[rank]];
4239   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4240   current_space = free_space;
4241 
4242   /* determine symbolic info for each local row */
4243   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4244   nextrow = buf_ri_k + merge->nrecv;
4245   nextai  = nextrow + merge->nrecv;
4246   for (k=0; k<merge->nrecv; k++){
4247     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4248     nrows = *buf_ri_k[k];
4249     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4250     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4251   }
4252 
4253   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4254   len = 0;
4255   for (i=0;i<m;i++) {
4256     bnzi   = 0;
4257     /* add local non-zero cols of this proc's seqmat into lnk */
4258     arow   = owners[rank] + i;
4259     anzi   = ai[arow+1] - ai[arow];
4260     aj     = a->j + ai[arow];
4261     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4262     bnzi += nlnk;
4263     /* add received col data into lnk */
4264     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4265       if (i == *nextrow[k]) { /* i-th row */
4266         anzi = *(nextai[k]+1) - *nextai[k];
4267         aj   = buf_rj[k] + *nextai[k];
4268         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4269         bnzi += nlnk;
4270         nextrow[k]++; nextai[k]++;
4271       }
4272     }
4273     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4274 
4275     /* if free space is not available, make more free space */
4276     if (current_space->local_remaining<bnzi) {
4277       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4278       nspacedouble++;
4279     }
4280     /* copy data into free space, then initialize lnk */
4281     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4282     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4283 
4284     current_space->array           += bnzi;
4285     current_space->local_used      += bnzi;
4286     current_space->local_remaining -= bnzi;
4287 
4288     bi[i+1] = bi[i] + bnzi;
4289   }
4290 
4291   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4292 
4293   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4294   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4295   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4296 
4297   /* create symbolic parallel matrix B_mpi */
4298   /*---------------------------------------*/
4299   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4300   if (n==PETSC_DECIDE) {
4301     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4302   } else {
4303     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4304   }
4305   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4306   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4307   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4308 
4309   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4310   B_mpi->assembled     = PETSC_FALSE;
4311   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4312   merge->bi            = bi;
4313   merge->bj            = bj;
4314   merge->buf_ri        = buf_ri;
4315   merge->buf_rj        = buf_rj;
4316   merge->coi           = PETSC_NULL;
4317   merge->coj           = PETSC_NULL;
4318   merge->owners_co     = PETSC_NULL;
4319 
4320   /* attach the supporting struct to B_mpi for reuse */
4321   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4322   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4323   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4324   *mpimat = B_mpi;
4325 
4326   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4327   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4328   PetscFunctionReturn(0);
4329 }
4330 
4331 #undef __FUNCT__
4332 #define __FUNCT__ "MatMerge_SeqsToMPI"
4333 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4334 {
4335   PetscErrorCode   ierr;
4336 
4337   PetscFunctionBegin;
4338   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4339   if (scall == MAT_INITIAL_MATRIX){
4340     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4341   }
4342   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4343   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4344   PetscFunctionReturn(0);
4345 }
4346 
4347 #undef __FUNCT__
4348 #define __FUNCT__ "MatGetLocalMat"
4349 /*@
4350      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4351 
4352     Not Collective
4353 
4354    Input Parameters:
4355 +    A - the matrix
4356 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4357 
4358    Output Parameter:
4359 .    A_loc - the local sequential matrix generated
4360 
4361     Level: developer
4362 
4363 @*/
4364 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4365 {
4366   PetscErrorCode  ierr;
4367   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4368   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4369   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4370   MatScalar       *aa=a->a,*ba=b->a,*cam;
4371   PetscScalar     *ca;
4372   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4373   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4374 
4375   PetscFunctionBegin;
4376   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4377   if (scall == MAT_INITIAL_MATRIX){
4378     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4379     ci[0] = 0;
4380     for (i=0; i<am; i++){
4381       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4382     }
4383     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4384     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4385     k = 0;
4386     for (i=0; i<am; i++) {
4387       ncols_o = bi[i+1] - bi[i];
4388       ncols_d = ai[i+1] - ai[i];
4389       /* off-diagonal portion of A */
4390       for (jo=0; jo<ncols_o; jo++) {
4391         col = cmap[*bj];
4392         if (col >= cstart) break;
4393         cj[k]   = col; bj++;
4394         ca[k++] = *ba++;
4395       }
4396       /* diagonal portion of A */
4397       for (j=0; j<ncols_d; j++) {
4398         cj[k]   = cstart + *aj++;
4399         ca[k++] = *aa++;
4400       }
4401       /* off-diagonal portion of A */
4402       for (j=jo; j<ncols_o; j++) {
4403         cj[k]   = cmap[*bj++];
4404         ca[k++] = *ba++;
4405       }
4406     }
4407     /* put together the new matrix */
4408     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4409     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4410     /* Since these are PETSc arrays, change flags to free them as necessary. */
4411     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4412     mat->free_a  = PETSC_TRUE;
4413     mat->free_ij = PETSC_TRUE;
4414     mat->nonew   = 0;
4415   } else if (scall == MAT_REUSE_MATRIX){
4416     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4417     ci = mat->i; cj = mat->j; cam = mat->a;
4418     for (i=0; i<am; i++) {
4419       /* off-diagonal portion of A */
4420       ncols_o = bi[i+1] - bi[i];
4421       for (jo=0; jo<ncols_o; jo++) {
4422         col = cmap[*bj];
4423         if (col >= cstart) break;
4424         *cam++ = *ba++; bj++;
4425       }
4426       /* diagonal portion of A */
4427       ncols_d = ai[i+1] - ai[i];
4428       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4429       /* off-diagonal portion of A */
4430       for (j=jo; j<ncols_o; j++) {
4431         *cam++ = *ba++; bj++;
4432       }
4433     }
4434   } else {
4435     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4436   }
4437 
4438   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4439   PetscFunctionReturn(0);
4440 }
4441 
4442 #undef __FUNCT__
4443 #define __FUNCT__ "MatGetLocalMatCondensed"
4444 /*@C
4445      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4446 
4447     Not Collective
4448 
4449    Input Parameters:
4450 +    A - the matrix
4451 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4452 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4453 
4454    Output Parameter:
4455 .    A_loc - the local sequential matrix generated
4456 
4457     Level: developer
4458 
4459 @*/
4460 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4461 {
4462   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4463   PetscErrorCode    ierr;
4464   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4465   IS                isrowa,iscola;
4466   Mat               *aloc;
4467 
4468   PetscFunctionBegin;
4469   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4470   if (!row){
4471     start = A->rmap->rstart; end = A->rmap->rend;
4472     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4473   } else {
4474     isrowa = *row;
4475   }
4476   if (!col){
4477     start = A->cmap->rstart;
4478     cmap  = a->garray;
4479     nzA   = a->A->cmap->n;
4480     nzB   = a->B->cmap->n;
4481     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4482     ncols = 0;
4483     for (i=0; i<nzB; i++) {
4484       if (cmap[i] < start) idx[ncols++] = cmap[i];
4485       else break;
4486     }
4487     imark = i;
4488     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4489     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4490     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4491     ierr = PetscFree(idx);CHKERRQ(ierr);
4492   } else {
4493     iscola = *col;
4494   }
4495   if (scall != MAT_INITIAL_MATRIX){
4496     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4497     aloc[0] = *A_loc;
4498   }
4499   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4500   *A_loc = aloc[0];
4501   ierr = PetscFree(aloc);CHKERRQ(ierr);
4502   if (!row){
4503     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4504   }
4505   if (!col){
4506     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4507   }
4508   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4509   PetscFunctionReturn(0);
4510 }
4511 
4512 #undef __FUNCT__
4513 #define __FUNCT__ "MatGetBrowsOfAcols"
4514 /*@C
4515     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4516 
4517     Collective on Mat
4518 
4519    Input Parameters:
4520 +    A,B - the matrices in mpiaij format
4521 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4522 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4523 
4524    Output Parameter:
4525 +    rowb, colb - index sets of rows and columns of B to extract
4526 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4527 -    B_seq - the sequential matrix generated
4528 
4529     Level: developer
4530 
4531 @*/
4532 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4533 {
4534   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4535   PetscErrorCode    ierr;
4536   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4537   IS                isrowb,iscolb;
4538   Mat               *bseq;
4539 
4540   PetscFunctionBegin;
4541   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4542     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4543   }
4544   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4545 
4546   if (scall == MAT_INITIAL_MATRIX){
4547     start = A->cmap->rstart;
4548     cmap  = a->garray;
4549     nzA   = a->A->cmap->n;
4550     nzB   = a->B->cmap->n;
4551     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4552     ncols = 0;
4553     for (i=0; i<nzB; i++) {  /* row < local row index */
4554       if (cmap[i] < start) idx[ncols++] = cmap[i];
4555       else break;
4556     }
4557     imark = i;
4558     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4559     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4560     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4561     ierr = PetscFree(idx);CHKERRQ(ierr);
4562     *brstart = imark;
4563     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4564   } else {
4565     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4566     isrowb = *rowb; iscolb = *colb;
4567     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4568     bseq[0] = *B_seq;
4569   }
4570   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4571   *B_seq = bseq[0];
4572   ierr = PetscFree(bseq);CHKERRQ(ierr);
4573   if (!rowb){
4574     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4575   } else {
4576     *rowb = isrowb;
4577   }
4578   if (!colb){
4579     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4580   } else {
4581     *colb = iscolb;
4582   }
4583   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4584   PetscFunctionReturn(0);
4585 }
4586 
4587 #undef __FUNCT__
4588 #define __FUNCT__ "MatGetBrowsOfAoCols"
4589 /*@C
4590     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4591     of the OFF-DIAGONAL portion of local A
4592 
4593     Collective on Mat
4594 
4595    Input Parameters:
4596 +    A,B - the matrices in mpiaij format
4597 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4598 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4599 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4600 
4601    Output Parameter:
4602 +    B_oth - the sequential matrix generated
4603 
4604     Level: developer
4605 
4606 @*/
4607 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth)
4608 {
4609   VecScatter_MPI_General *gen_to,*gen_from;
4610   PetscErrorCode         ierr;
4611   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4612   Mat_SeqAIJ             *b_oth;
4613   VecScatter             ctx=a->Mvctx;
4614   MPI_Comm               comm=((PetscObject)ctx)->comm;
4615   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4616   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4617   PetscScalar            *rvalues,*svalues;
4618   MatScalar              *b_otha,*bufa,*bufA;
4619   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4620   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4621   MPI_Status             *sstatus,rstatus;
4622   PetscMPIInt            jj;
4623   PetscInt               *cols,sbs,rbs;
4624   PetscScalar            *vals;
4625 
4626   PetscFunctionBegin;
4627   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4628     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4629   }
4630   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4631   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4632 
4633   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4634   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4635   rvalues  = gen_from->values; /* holds the length of receiving row */
4636   svalues  = gen_to->values;   /* holds the length of sending row */
4637   nrecvs   = gen_from->n;
4638   nsends   = gen_to->n;
4639 
4640   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4641   srow     = gen_to->indices;   /* local row index to be sent */
4642   sstarts  = gen_to->starts;
4643   sprocs   = gen_to->procs;
4644   sstatus  = gen_to->sstatus;
4645   sbs      = gen_to->bs;
4646   rstarts  = gen_from->starts;
4647   rprocs   = gen_from->procs;
4648   rbs      = gen_from->bs;
4649 
4650   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4651   if (scall == MAT_INITIAL_MATRIX){
4652     /* i-array */
4653     /*---------*/
4654     /*  post receives */
4655     for (i=0; i<nrecvs; i++){
4656       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4657       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4658       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4659     }
4660 
4661     /* pack the outgoing message */
4662     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4663     rstartsj = sstartsj + nsends +1;
4664     sstartsj[0] = 0;  rstartsj[0] = 0;
4665     len = 0; /* total length of j or a array to be sent */
4666     k = 0;
4667     for (i=0; i<nsends; i++){
4668       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4669       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4670       for (j=0; j<nrows; j++) {
4671         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4672         for (l=0; l<sbs; l++){
4673           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4674           rowlen[j*sbs+l] = ncols;
4675           len += ncols;
4676           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4677         }
4678         k++;
4679       }
4680       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4681       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4682     }
4683     /* recvs and sends of i-array are completed */
4684     i = nrecvs;
4685     while (i--) {
4686       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4687     }
4688     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4689 
4690     /* allocate buffers for sending j and a arrays */
4691     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4692     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4693 
4694     /* create i-array of B_oth */
4695     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4696     b_othi[0] = 0;
4697     len = 0; /* total length of j or a array to be received */
4698     k = 0;
4699     for (i=0; i<nrecvs; i++){
4700       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4701       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4702       for (j=0; j<nrows; j++) {
4703         b_othi[k+1] = b_othi[k] + rowlen[j];
4704         len += rowlen[j]; k++;
4705       }
4706       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4707     }
4708 
4709     /* allocate space for j and a arrrays of B_oth */
4710     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4711     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4712 
4713     /* j-array */
4714     /*---------*/
4715     /*  post receives of j-array */
4716     for (i=0; i<nrecvs; i++){
4717       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4718       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4719     }
4720 
4721     /* pack the outgoing message j-array */
4722     k = 0;
4723     for (i=0; i<nsends; i++){
4724       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4725       bufJ = bufj+sstartsj[i];
4726       for (j=0; j<nrows; j++) {
4727         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4728         for (ll=0; ll<sbs; ll++){
4729           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4730           for (l=0; l<ncols; l++){
4731             *bufJ++ = cols[l];
4732           }
4733           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4734         }
4735       }
4736       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4737     }
4738 
4739     /* recvs and sends of j-array are completed */
4740     i = nrecvs;
4741     while (i--) {
4742       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4743     }
4744     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4745   } else if (scall == MAT_REUSE_MATRIX){
4746     sstartsj = *startsj;
4747     rstartsj = sstartsj + nsends +1;
4748     bufa     = *bufa_ptr;
4749     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4750     b_otha   = b_oth->a;
4751   } else {
4752     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4753   }
4754 
4755   /* a-array */
4756   /*---------*/
4757   /*  post receives of a-array */
4758   for (i=0; i<nrecvs; i++){
4759     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4760     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4761   }
4762 
4763   /* pack the outgoing message a-array */
4764   k = 0;
4765   for (i=0; i<nsends; i++){
4766     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4767     bufA = bufa+sstartsj[i];
4768     for (j=0; j<nrows; j++) {
4769       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4770       for (ll=0; ll<sbs; ll++){
4771         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4772         for (l=0; l<ncols; l++){
4773           *bufA++ = vals[l];
4774         }
4775         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4776       }
4777     }
4778     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4779   }
4780   /* recvs and sends of a-array are completed */
4781   i = nrecvs;
4782   while (i--) {
4783     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4784   }
4785   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4786   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4787 
4788   if (scall == MAT_INITIAL_MATRIX){
4789     /* put together the new matrix */
4790     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4791 
4792     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4793     /* Since these are PETSc arrays, change flags to free them as necessary. */
4794     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4795     b_oth->free_a  = PETSC_TRUE;
4796     b_oth->free_ij = PETSC_TRUE;
4797     b_oth->nonew   = 0;
4798 
4799     ierr = PetscFree(bufj);CHKERRQ(ierr);
4800     if (!startsj || !bufa_ptr){
4801       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4802       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4803     } else {
4804       *startsj  = sstartsj;
4805       *bufa_ptr = bufa;
4806     }
4807   }
4808   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4809   PetscFunctionReturn(0);
4810 }
4811 
4812 #undef __FUNCT__
4813 #define __FUNCT__ "MatGetCommunicationStructs"
4814 /*@C
4815   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4816 
4817   Not Collective
4818 
4819   Input Parameters:
4820 . A - The matrix in mpiaij format
4821 
4822   Output Parameter:
4823 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4824 . colmap - A map from global column index to local index into lvec
4825 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4826 
4827   Level: developer
4828 
4829 @*/
4830 #if defined (PETSC_USE_CTABLE)
4831 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4832 #else
4833 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4834 #endif
4835 {
4836   Mat_MPIAIJ *a;
4837 
4838   PetscFunctionBegin;
4839   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4840   PetscValidPointer(lvec, 2)
4841   PetscValidPointer(colmap, 3)
4842   PetscValidPointer(multScatter, 4)
4843   a = (Mat_MPIAIJ *) A->data;
4844   if (lvec) *lvec = a->lvec;
4845   if (colmap) *colmap = a->colmap;
4846   if (multScatter) *multScatter = a->Mvctx;
4847   PetscFunctionReturn(0);
4848 }
4849 
4850 EXTERN_C_BEGIN
4851 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*);
4852 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*);
4853 EXTERN_C_END
4854 
4855 #include "../src/mat/impls/dense/mpi/mpidense.h"
4856 
4857 #undef __FUNCT__
4858 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4859 /*
4860     Computes (B'*A')' since computing B*A directly is untenable
4861 
4862                n                       p                          p
4863         (              )       (              )         (                  )
4864       m (      A       )  *  n (       B      )   =   m (         C        )
4865         (              )       (              )         (                  )
4866 
4867 */
4868 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4869 {
4870   PetscErrorCode     ierr;
4871   Mat                At,Bt,Ct;
4872 
4873   PetscFunctionBegin;
4874   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4875   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4876   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4877   ierr = MatDestroy(At);CHKERRQ(ierr);
4878   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4879   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4880   ierr = MatDestroy(Ct);CHKERRQ(ierr);
4881   PetscFunctionReturn(0);
4882 }
4883 
4884 #undef __FUNCT__
4885 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4886 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4887 {
4888   PetscErrorCode ierr;
4889   PetscInt       m=A->rmap->n,n=B->cmap->n;
4890   Mat            Cmat;
4891 
4892   PetscFunctionBegin;
4893   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4894   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
4895   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4896   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4897   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
4898   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4899   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4900   *C   = Cmat;
4901   PetscFunctionReturn(0);
4902 }
4903 
4904 /* ----------------------------------------------------------------*/
4905 #undef __FUNCT__
4906 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4907 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4908 {
4909   PetscErrorCode ierr;
4910 
4911   PetscFunctionBegin;
4912   if (scall == MAT_INITIAL_MATRIX){
4913     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4914   }
4915   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4916   PetscFunctionReturn(0);
4917 }
4918 
4919 EXTERN_C_BEGIN
4920 #if defined(PETSC_HAVE_MUMPS)
4921 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*);
4922 #endif
4923 #if defined(PETSC_HAVE_PASTIX)
4924 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
4925 #endif
4926 #if defined(PETSC_HAVE_SUPERLU_DIST)
4927 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
4928 #endif
4929 #if defined(PETSC_HAVE_SPOOLES)
4930 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
4931 #endif
4932 EXTERN_C_END
4933 
4934 /*MC
4935    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4936 
4937    Options Database Keys:
4938 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4939 
4940   Level: beginner
4941 
4942 .seealso: MatCreateMPIAIJ()
4943 M*/
4944 
4945 EXTERN_C_BEGIN
4946 #undef __FUNCT__
4947 #define __FUNCT__ "MatCreate_MPIAIJ"
4948 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
4949 {
4950   Mat_MPIAIJ     *b;
4951   PetscErrorCode ierr;
4952   PetscMPIInt    size;
4953 
4954   PetscFunctionBegin;
4955   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
4956 
4957   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
4958   B->data         = (void*)b;
4959   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4960   B->rmap->bs      = 1;
4961   B->assembled    = PETSC_FALSE;
4962   B->mapping      = 0;
4963 
4964   B->insertmode      = NOT_SET_VALUES;
4965   b->size            = size;
4966   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
4967 
4968   /* build cache for off array entries formed */
4969   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
4970   b->donotstash  = PETSC_FALSE;
4971   b->colmap      = 0;
4972   b->garray      = 0;
4973   b->roworiented = PETSC_TRUE;
4974 
4975   /* stuff used for matrix vector multiply */
4976   b->lvec      = PETSC_NULL;
4977   b->Mvctx     = PETSC_NULL;
4978 
4979   /* stuff for MatGetRow() */
4980   b->rowindices   = 0;
4981   b->rowvalues    = 0;
4982   b->getrowactive = PETSC_FALSE;
4983 
4984 #if defined(PETSC_HAVE_SPOOLES)
4985   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_spooles_C",
4986                                      "MatGetFactor_mpiaij_spooles",
4987                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
4988 #endif
4989 #if defined(PETSC_HAVE_MUMPS)
4990   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_mumps_C",
4991                                      "MatGetFactor_mpiaij_mumps",
4992                                      MatGetFactor_mpiaij_mumps);CHKERRQ(ierr);
4993 #endif
4994 #if defined(PETSC_HAVE_PASTIX)
4995   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_pastix_C",
4996 					   "MatGetFactor_mpiaij_pastix",
4997 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
4998 #endif
4999 #if defined(PETSC_HAVE_SUPERLU_DIST)
5000   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_superlu_dist_C",
5001                                      "MatGetFactor_mpiaij_superlu_dist",
5002                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5003 #endif
5004   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5005                                      "MatStoreValues_MPIAIJ",
5006                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5007   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5008                                      "MatRetrieveValues_MPIAIJ",
5009                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5010   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5011 				     "MatGetDiagonalBlock_MPIAIJ",
5012                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5013   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5014 				     "MatIsTranspose_MPIAIJ",
5015 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5016   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5017 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5018 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5019   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5020 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5021 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5022   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5023 				     "MatDiagonalScaleLocal_MPIAIJ",
5024 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5025   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
5026                                      "MatConvert_MPIAIJ_MPICSRPERM",
5027                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
5028   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
5029                                      "MatConvert_MPIAIJ_MPICRL",
5030                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
5031   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5032                                      "MatMatMult_MPIDense_MPIAIJ",
5033                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5034   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5035                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5036                                       MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5037   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5038                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5039                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5040   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5041   PetscFunctionReturn(0);
5042 }
5043 EXTERN_C_END
5044 
5045 #undef __FUNCT__
5046 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5047 /*@
5048      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5049          and "off-diagonal" part of the matrix in CSR format.
5050 
5051    Collective on MPI_Comm
5052 
5053    Input Parameters:
5054 +  comm - MPI communicator
5055 .  m - number of local rows (Cannot be PETSC_DECIDE)
5056 .  n - This value should be the same as the local size used in creating the
5057        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5058        calculated if N is given) For square matrices n is almost always m.
5059 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5060 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5061 .   i - row indices for "diagonal" portion of matrix
5062 .   j - column indices
5063 .   a - matrix values
5064 .   oi - row indices for "off-diagonal" portion of matrix
5065 .   oj - column indices
5066 -   oa - matrix values
5067 
5068    Output Parameter:
5069 .   mat - the matrix
5070 
5071    Level: advanced
5072 
5073    Notes:
5074        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5075 
5076        The i and j indices are 0 based
5077 
5078        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5079 
5080 
5081 .keywords: matrix, aij, compressed row, sparse, parallel
5082 
5083 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5084           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5085 @*/
5086 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5087 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5088 {
5089   PetscErrorCode ierr;
5090   Mat_MPIAIJ     *maij;
5091 
5092  PetscFunctionBegin;
5093   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5094   if (i[0]) {
5095     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5096   }
5097   if (oi[0]) {
5098     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5099   }
5100   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5101   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5102   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5103   maij = (Mat_MPIAIJ*) (*mat)->data;
5104   maij->donotstash     = PETSC_TRUE;
5105   (*mat)->preallocated = PETSC_TRUE;
5106 
5107   ierr = PetscMapSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5108   ierr = PetscMapSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5109   ierr = PetscMapSetUp((*mat)->rmap);CHKERRQ(ierr);
5110   ierr = PetscMapSetUp((*mat)->cmap);CHKERRQ(ierr);
5111 
5112   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5113   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5114 
5115   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5116   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5117   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5118   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5119 
5120   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5121   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5122   PetscFunctionReturn(0);
5123 }
5124 
5125 /*
5126     Special version for direct calls from Fortran
5127 */
5128 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5129 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5130 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5131 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5132 #endif
5133 
5134 /* Change these macros so can be used in void function */
5135 #undef CHKERRQ
5136 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5137 #undef SETERRQ2
5138 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5139 #undef SETERRQ
5140 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5141 
5142 EXTERN_C_BEGIN
5143 #undef __FUNCT__
5144 #define __FUNCT__ "matsetvaluesmpiaij_"
5145 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5146 {
5147   Mat             mat = *mmat;
5148   PetscInt        m = *mm, n = *mn;
5149   InsertMode      addv = *maddv;
5150   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5151   PetscScalar     value;
5152   PetscErrorCode  ierr;
5153 
5154   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5155   if (mat->insertmode == NOT_SET_VALUES) {
5156     mat->insertmode = addv;
5157   }
5158 #if defined(PETSC_USE_DEBUG)
5159   else if (mat->insertmode != addv) {
5160     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5161   }
5162 #endif
5163   {
5164   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5165   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5166   PetscTruth      roworiented = aij->roworiented;
5167 
5168   /* Some Variables required in the macro */
5169   Mat             A = aij->A;
5170   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5171   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5172   MatScalar       *aa = a->a;
5173   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5174   Mat             B = aij->B;
5175   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5176   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5177   MatScalar       *ba = b->a;
5178 
5179   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5180   PetscInt        nonew = a->nonew;
5181   MatScalar       *ap1,*ap2;
5182 
5183   PetscFunctionBegin;
5184   for (i=0; i<m; i++) {
5185     if (im[i] < 0) continue;
5186 #if defined(PETSC_USE_DEBUG)
5187     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5188 #endif
5189     if (im[i] >= rstart && im[i] < rend) {
5190       row      = im[i] - rstart;
5191       lastcol1 = -1;
5192       rp1      = aj + ai[row];
5193       ap1      = aa + ai[row];
5194       rmax1    = aimax[row];
5195       nrow1    = ailen[row];
5196       low1     = 0;
5197       high1    = nrow1;
5198       lastcol2 = -1;
5199       rp2      = bj + bi[row];
5200       ap2      = ba + bi[row];
5201       rmax2    = bimax[row];
5202       nrow2    = bilen[row];
5203       low2     = 0;
5204       high2    = nrow2;
5205 
5206       for (j=0; j<n; j++) {
5207         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5208         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5209         if (in[j] >= cstart && in[j] < cend){
5210           col = in[j] - cstart;
5211           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5212         } else if (in[j] < 0) continue;
5213 #if defined(PETSC_USE_DEBUG)
5214         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5215 #endif
5216         else {
5217           if (mat->was_assembled) {
5218             if (!aij->colmap) {
5219               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5220             }
5221 #if defined (PETSC_USE_CTABLE)
5222             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5223 	    col--;
5224 #else
5225             col = aij->colmap[in[j]] - 1;
5226 #endif
5227             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5228               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5229               col =  in[j];
5230               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5231               B = aij->B;
5232               b = (Mat_SeqAIJ*)B->data;
5233               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5234               rp2      = bj + bi[row];
5235               ap2      = ba + bi[row];
5236               rmax2    = bimax[row];
5237               nrow2    = bilen[row];
5238               low2     = 0;
5239               high2    = nrow2;
5240               bm       = aij->B->rmap->n;
5241               ba = b->a;
5242             }
5243           } else col = in[j];
5244           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5245         }
5246       }
5247     } else {
5248       if (!aij->donotstash) {
5249         if (roworiented) {
5250           if (ignorezeroentries && v[i*n] == 0.0) continue;
5251           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
5252         } else {
5253           if (ignorezeroentries && v[i] == 0.0) continue;
5254           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
5255         }
5256       }
5257     }
5258   }}
5259   PetscFunctionReturnVoid();
5260 }
5261 EXTERN_C_END
5262 
5263