xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 85911e72939a499ddf0b06e7bd6e9f892f6cb6ab)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "../src/inline/spops.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscTruth     aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap->n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap->n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap->n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap->n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 
207 #define CHUNKSIZE   15
208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
209 { \
210     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
211     lastcol1 = col;\
212     while (high1-low1 > 5) { \
213       t = (low1+high1)/2; \
214       if (rp1[t] > col) high1 = t; \
215       else             low1  = t; \
216     } \
217       for (_i=low1; _i<high1; _i++) { \
218         if (rp1[_i] > col) break; \
219         if (rp1[_i] == col) { \
220           if (addv == ADD_VALUES) ap1[_i] += value;   \
221           else                    ap1[_i] = value; \
222           goto a_noinsert; \
223         } \
224       }  \
225       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
226       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
227       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
228       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
229       N = nrow1++ - 1; a->nz++; high1++; \
230       /* shift up all the later entries in this row */ \
231       for (ii=N; ii>=_i; ii--) { \
232         rp1[ii+1] = rp1[ii]; \
233         ap1[ii+1] = ap1[ii]; \
234       } \
235       rp1[_i] = col;  \
236       ap1[_i] = value;  \
237       a_noinsert: ; \
238       ailen[row] = nrow1; \
239 }
240 
241 
242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
243 { \
244     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
245     lastcol2 = col;\
246     while (high2-low2 > 5) { \
247       t = (low2+high2)/2; \
248       if (rp2[t] > col) high2 = t; \
249       else             low2  = t; \
250     } \
251     for (_i=low2; _i<high2; _i++) {		\
252       if (rp2[_i] > col) break;			\
253       if (rp2[_i] == col) {			      \
254 	if (addv == ADD_VALUES) ap2[_i] += value;     \
255 	else                    ap2[_i] = value;      \
256 	goto b_noinsert;			      \
257       }						      \
258     }							      \
259     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
260     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
261     if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
262     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
263     N = nrow2++ - 1; b->nz++; high2++;					\
264     /* shift up all the later entries in this row */			\
265     for (ii=N; ii>=_i; ii--) {						\
266       rp2[ii+1] = rp2[ii];						\
267       ap2[ii+1] = ap2[ii];						\
268     }									\
269     rp2[_i] = col;							\
270     ap2[_i] = value;							\
271     b_noinsert: ;								\
272     bilen[row] = nrow2;							\
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
278 {
279   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
280   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
281   PetscErrorCode ierr;
282   PetscInt       l,*garray = mat->garray,diag;
283 
284   PetscFunctionBegin;
285   /* code only works for square matrices A */
286 
287   /* find size of row to the left of the diagonal part */
288   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
289   row  = row - diag;
290   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
291     if (garray[b->j[b->i[row]+l]] > diag) break;
292   }
293   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
294 
295   /* diagonal part */
296   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
297 
298   /* right of diagonal part */
299   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
300   PetscFunctionReturn(0);
301 }
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "MatSetValues_MPIAIJ"
305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
306 {
307   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
308   PetscScalar    value;
309   PetscErrorCode ierr;
310   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
311   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
312   PetscTruth     roworiented = aij->roworiented;
313 
314   /* Some Variables required in the macro */
315   Mat            A = aij->A;
316   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
317   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
318   MatScalar      *aa = a->a;
319   PetscTruth     ignorezeroentries = a->ignorezeroentries;
320   Mat            B = aij->B;
321   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
322   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
323   MatScalar      *ba = b->a;
324 
325   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
326   PetscInt       nonew = a->nonew;
327   MatScalar      *ap1,*ap2;
328 
329   PetscFunctionBegin;
330   for (i=0; i<m; i++) {
331     if (im[i] < 0) continue;
332 #if defined(PETSC_USE_DEBUG)
333     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
334 #endif
335     if (im[i] >= rstart && im[i] < rend) {
336       row      = im[i] - rstart;
337       lastcol1 = -1;
338       rp1      = aj + ai[row];
339       ap1      = aa + ai[row];
340       rmax1    = aimax[row];
341       nrow1    = ailen[row];
342       low1     = 0;
343       high1    = nrow1;
344       lastcol2 = -1;
345       rp2      = bj + bi[row];
346       ap2      = ba + bi[row];
347       rmax2    = bimax[row];
348       nrow2    = bilen[row];
349       low2     = 0;
350       high2    = nrow2;
351 
352       for (j=0; j<n; j++) {
353         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
354         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
355         if (in[j] >= cstart && in[j] < cend){
356           col = in[j] - cstart;
357           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
358         } else if (in[j] < 0) continue;
359 #if defined(PETSC_USE_DEBUG)
360         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
361 #endif
362         else {
363           if (mat->was_assembled) {
364             if (!aij->colmap) {
365               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
366             }
367 #if defined (PETSC_USE_CTABLE)
368             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
369 	    col--;
370 #else
371             col = aij->colmap[in[j]] - 1;
372 #endif
373             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
374               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
375               col =  in[j];
376               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
377               B = aij->B;
378               b = (Mat_SeqAIJ*)B->data;
379               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
380               rp2      = bj + bi[row];
381               ap2      = ba + bi[row];
382               rmax2    = bimax[row];
383               nrow2    = bilen[row];
384               low2     = 0;
385               high2    = nrow2;
386               bm       = aij->B->rmap->n;
387               ba = b->a;
388             }
389           } else col = in[j];
390           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
391         }
392       }
393     } else {
394       if (!aij->donotstash) {
395         if (roworiented) {
396           if (ignorezeroentries && v[i*n] == 0.0) continue;
397           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
398         } else {
399           if (ignorezeroentries && v[i] == 0.0) continue;
400           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
401         }
402       }
403     }
404   }
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNCT__
409 #define __FUNCT__ "MatGetValues_MPIAIJ"
410 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
411 {
412   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
413   PetscErrorCode ierr;
414   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
415   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
416 
417   PetscFunctionBegin;
418   for (i=0; i<m; i++) {
419     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
420     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
421     if (idxm[i] >= rstart && idxm[i] < rend) {
422       row = idxm[i] - rstart;
423       for (j=0; j<n; j++) {
424         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
425         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
426         if (idxn[j] >= cstart && idxn[j] < cend){
427           col = idxn[j] - cstart;
428           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
429         } else {
430           if (!aij->colmap) {
431             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
432           }
433 #if defined (PETSC_USE_CTABLE)
434           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
435           col --;
436 #else
437           col = aij->colmap[idxn[j]] - 1;
438 #endif
439           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
440           else {
441             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
442           }
443         }
444       }
445     } else {
446       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
447     }
448   }
449   PetscFunctionReturn(0);
450 }
451 
452 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
453 
454 #undef __FUNCT__
455 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
456 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
457 {
458   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
459   PetscErrorCode ierr;
460   PetscInt       nstash,reallocs;
461   InsertMode     addv;
462 
463   PetscFunctionBegin;
464   if (aij->donotstash) {
465     PetscFunctionReturn(0);
466   }
467 
468   /* make sure all processors are either in INSERTMODE or ADDMODE */
469   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
470   if (addv == (ADD_VALUES|INSERT_VALUES)) {
471     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
472   }
473   mat->insertmode = addv; /* in case this processor had no cache */
474 
475   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
476   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
477   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
478   PetscFunctionReturn(0);
479 }
480 
481 #undef __FUNCT__
482 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
483 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
484 {
485   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
486   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
487   PetscErrorCode ierr;
488   PetscMPIInt    n;
489   PetscInt       i,j,rstart,ncols,flg;
490   PetscInt       *row,*col;
491   PetscTruth     other_disassembled;
492   PetscScalar    *val;
493   InsertMode     addv = mat->insertmode;
494 
495   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
496   PetscFunctionBegin;
497   if (!aij->donotstash) {
498     while (1) {
499       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
500       if (!flg) break;
501 
502       for (i=0; i<n;) {
503         /* Now identify the consecutive vals belonging to the same row */
504         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
505         if (j < n) ncols = j-i;
506         else       ncols = n-i;
507         /* Now assemble all these values with a single function call */
508         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
509         i = j;
510       }
511     }
512     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
513   }
514   a->compressedrow.use     = PETSC_FALSE;
515   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
516   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
517 
518   /* determine if any processor has disassembled, if so we must
519      also disassemble ourselfs, in order that we may reassemble. */
520   /*
521      if nonzero structure of submatrix B cannot change then we know that
522      no processor disassembled thus we can skip this stuff
523   */
524   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
525     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
526     if (mat->was_assembled && !other_disassembled) {
527       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
528     }
529   }
530   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
531     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
532   }
533   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
534   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
535   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
536   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
537 
538   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
539   aij->rowvalues = 0;
540 
541   /* used by MatAXPY() */
542   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
543   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
544 
545   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
546   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
547   PetscFunctionReturn(0);
548 }
549 
550 #undef __FUNCT__
551 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
552 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
553 {
554   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
555   PetscErrorCode ierr;
556 
557   PetscFunctionBegin;
558   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
559   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
560   PetscFunctionReturn(0);
561 }
562 
563 #undef __FUNCT__
564 #define __FUNCT__ "MatZeroRows_MPIAIJ"
565 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
566 {
567   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
568   PetscErrorCode ierr;
569   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
570   PetscInt       i,*owners = A->rmap->range;
571   PetscInt       *nprocs,j,idx,nsends,row;
572   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
573   PetscInt       *rvalues,count,base,slen,*source;
574   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
575   MPI_Comm       comm = ((PetscObject)A)->comm;
576   MPI_Request    *send_waits,*recv_waits;
577   MPI_Status     recv_status,*send_status;
578 #if defined(PETSC_DEBUG)
579   PetscTruth     found = PETSC_FALSE;
580 #endif
581 
582   PetscFunctionBegin;
583   /*  first count number of contributors to each processor */
584   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
585   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
586   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
587   j = 0;
588   for (i=0; i<N; i++) {
589     if (lastidx > (idx = rows[i])) j = 0;
590     lastidx = idx;
591     for (; j<size; j++) {
592       if (idx >= owners[j] && idx < owners[j+1]) {
593         nprocs[2*j]++;
594         nprocs[2*j+1] = 1;
595         owner[i] = j;
596 #if defined(PETSC_DEBUG)
597         found = PETSC_TRUE;
598 #endif
599         break;
600       }
601     }
602 #if defined(PETSC_DEBUG)
603     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
604     found = PETSC_FALSE;
605 #endif
606   }
607   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
608 
609   /* inform other processors of number of messages and max length*/
610   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
611 
612   /* post receives:   */
613   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
614   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
615   for (i=0; i<nrecvs; i++) {
616     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
617   }
618 
619   /* do sends:
620       1) starts[i] gives the starting index in svalues for stuff going to
621          the ith processor
622   */
623   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
624   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
625   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
626   starts[0] = 0;
627   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
628   for (i=0; i<N; i++) {
629     svalues[starts[owner[i]]++] = rows[i];
630   }
631 
632   starts[0] = 0;
633   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
634   count = 0;
635   for (i=0; i<size; i++) {
636     if (nprocs[2*i+1]) {
637       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
638     }
639   }
640   ierr = PetscFree(starts);CHKERRQ(ierr);
641 
642   base = owners[rank];
643 
644   /*  wait on receives */
645   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
646   source = lens + nrecvs;
647   count  = nrecvs; slen = 0;
648   while (count) {
649     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
650     /* unpack receives into our local space */
651     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
652     source[imdex]  = recv_status.MPI_SOURCE;
653     lens[imdex]    = n;
654     slen          += n;
655     count--;
656   }
657   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
658 
659   /* move the data into the send scatter */
660   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
661   count = 0;
662   for (i=0; i<nrecvs; i++) {
663     values = rvalues + i*nmax;
664     for (j=0; j<lens[i]; j++) {
665       lrows[count++] = values[j] - base;
666     }
667   }
668   ierr = PetscFree(rvalues);CHKERRQ(ierr);
669   ierr = PetscFree(lens);CHKERRQ(ierr);
670   ierr = PetscFree(owner);CHKERRQ(ierr);
671   ierr = PetscFree(nprocs);CHKERRQ(ierr);
672 
673   /* actually zap the local rows */
674   /*
675         Zero the required rows. If the "diagonal block" of the matrix
676      is square and the user wishes to set the diagonal we use separate
677      code so that MatSetValues() is not called for each diagonal allocating
678      new memory, thus calling lots of mallocs and slowing things down.
679 
680        Contributed by: Matthew Knepley
681   */
682   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
683   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
684   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
685     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
686   } else if (diag != 0.0) {
687     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
688     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
689       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
690 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
691     }
692     for (i = 0; i < slen; i++) {
693       row  = lrows[i] + rstart;
694       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
695     }
696     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
697     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
698   } else {
699     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
700   }
701   ierr = PetscFree(lrows);CHKERRQ(ierr);
702 
703   /* wait on sends */
704   if (nsends) {
705     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
706     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
707     ierr = PetscFree(send_status);CHKERRQ(ierr);
708   }
709   ierr = PetscFree(send_waits);CHKERRQ(ierr);
710   ierr = PetscFree(svalues);CHKERRQ(ierr);
711 
712   PetscFunctionReturn(0);
713 }
714 
715 #undef __FUNCT__
716 #define __FUNCT__ "MatMult_MPIAIJ"
717 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
718 {
719   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
720   PetscErrorCode ierr;
721   PetscInt       nt;
722 
723   PetscFunctionBegin;
724   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
725   if (nt != A->cmap->n) {
726     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
727   }
728   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
729   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
730   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
731   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
732   PetscFunctionReturn(0);
733 }
734 
735 #undef __FUNCT__
736 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
737 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
738 {
739   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
740   PetscErrorCode ierr;
741 
742   PetscFunctionBegin;
743   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
744   PetscFunctionReturn(0);
745 }
746 
747 #undef __FUNCT__
748 #define __FUNCT__ "MatMultAdd_MPIAIJ"
749 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
750 {
751   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
752   PetscErrorCode ierr;
753 
754   PetscFunctionBegin;
755   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
756   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
757   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
758   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
759   PetscFunctionReturn(0);
760 }
761 
762 #undef __FUNCT__
763 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
764 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
765 {
766   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
767   PetscErrorCode ierr;
768   PetscTruth     merged;
769 
770   PetscFunctionBegin;
771   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
772   /* do nondiagonal part */
773   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
774   if (!merged) {
775     /* send it on its way */
776     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
777     /* do local part */
778     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
779     /* receive remote parts: note this assumes the values are not actually */
780     /* added in yy until the next line, */
781     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
782   } else {
783     /* do local part */
784     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
785     /* send it on its way */
786     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
787     /* values actually were received in the Begin() but we need to call this nop */
788     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
789   }
790   PetscFunctionReturn(0);
791 }
792 
793 EXTERN_C_BEGIN
794 #undef __FUNCT__
795 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
796 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
797 {
798   MPI_Comm       comm;
799   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
800   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
801   IS             Me,Notme;
802   PetscErrorCode ierr;
803   PetscInt       M,N,first,last,*notme,i;
804   PetscMPIInt    size;
805 
806   PetscFunctionBegin;
807 
808   /* Easy test: symmetric diagonal block */
809   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
810   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
811   if (!*f) PetscFunctionReturn(0);
812   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
813   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
814   if (size == 1) PetscFunctionReturn(0);
815 
816   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
817   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
818   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
819   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
820   for (i=0; i<first; i++) notme[i] = i;
821   for (i=last; i<M; i++) notme[i-last+first] = i;
822   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
823   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
824   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
825   Aoff = Aoffs[0];
826   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
827   Boff = Boffs[0];
828   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
829   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
830   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
831   ierr = ISDestroy(Me);CHKERRQ(ierr);
832   ierr = ISDestroy(Notme);CHKERRQ(ierr);
833 
834   PetscFunctionReturn(0);
835 }
836 EXTERN_C_END
837 
838 #undef __FUNCT__
839 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
840 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
841 {
842   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
843   PetscErrorCode ierr;
844 
845   PetscFunctionBegin;
846   /* do nondiagonal part */
847   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
848   /* send it on its way */
849   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
850   /* do local part */
851   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
852   /* receive remote parts */
853   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
854   PetscFunctionReturn(0);
855 }
856 
857 /*
858   This only works correctly for square matrices where the subblock A->A is the
859    diagonal block
860 */
861 #undef __FUNCT__
862 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
863 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
864 {
865   PetscErrorCode ierr;
866   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
867 
868   PetscFunctionBegin;
869   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
870   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) {
871     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
872   }
873   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
874   PetscFunctionReturn(0);
875 }
876 
877 #undef __FUNCT__
878 #define __FUNCT__ "MatScale_MPIAIJ"
879 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
880 {
881   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
882   PetscErrorCode ierr;
883 
884   PetscFunctionBegin;
885   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
886   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
887   PetscFunctionReturn(0);
888 }
889 
890 #undef __FUNCT__
891 #define __FUNCT__ "MatDestroy_MPIAIJ"
892 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
893 {
894   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
895   PetscErrorCode ierr;
896 
897   PetscFunctionBegin;
898 #if defined(PETSC_USE_LOG)
899   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
900 #endif
901   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
902   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
903   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
904   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
905 #if defined (PETSC_USE_CTABLE)
906   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
907 #else
908   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
909 #endif
910   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
911   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
912   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
913   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
914   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
915   ierr = PetscFree(aij);CHKERRQ(ierr);
916 
917   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
918   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
919   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
920   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
921   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
922   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
923   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
924   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
925   PetscFunctionReturn(0);
926 }
927 
928 #undef __FUNCT__
929 #define __FUNCT__ "MatView_MPIAIJ_Binary"
930 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
931 {
932   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
933   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
934   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
935   PetscErrorCode    ierr;
936   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
937   int               fd;
938   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
939   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
940   PetscScalar       *column_values;
941 
942   PetscFunctionBegin;
943   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
944   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
945   nz   = A->nz + B->nz;
946   if (!rank) {
947     header[0] = MAT_FILE_COOKIE;
948     header[1] = mat->rmap->N;
949     header[2] = mat->cmap->N;
950     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
951     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
952     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
953     /* get largest number of rows any processor has */
954     rlen = mat->rmap->n;
955     range = mat->rmap->range;
956     for (i=1; i<size; i++) {
957       rlen = PetscMax(rlen,range[i+1] - range[i]);
958     }
959   } else {
960     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
961     rlen = mat->rmap->n;
962   }
963 
964   /* load up the local row counts */
965   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
966   for (i=0; i<mat->rmap->n; i++) {
967     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
968   }
969 
970   /* store the row lengths to the file */
971   if (!rank) {
972     MPI_Status status;
973     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
974     for (i=1; i<size; i++) {
975       rlen = range[i+1] - range[i];
976       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
977       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
978     }
979   } else {
980     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
981   }
982   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
983 
984   /* load up the local column indices */
985   nzmax = nz; /* )th processor needs space a largest processor needs */
986   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
987   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
988   cnt  = 0;
989   for (i=0; i<mat->rmap->n; i++) {
990     for (j=B->i[i]; j<B->i[i+1]; j++) {
991       if ( (col = garray[B->j[j]]) > cstart) break;
992       column_indices[cnt++] = col;
993     }
994     for (k=A->i[i]; k<A->i[i+1]; k++) {
995       column_indices[cnt++] = A->j[k] + cstart;
996     }
997     for (; j<B->i[i+1]; j++) {
998       column_indices[cnt++] = garray[B->j[j]];
999     }
1000   }
1001   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1002 
1003   /* store the column indices to the file */
1004   if (!rank) {
1005     MPI_Status status;
1006     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1007     for (i=1; i<size; i++) {
1008       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1009       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1010       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1011       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1012     }
1013   } else {
1014     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1015     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1016   }
1017   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1018 
1019   /* load up the local column values */
1020   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1021   cnt  = 0;
1022   for (i=0; i<mat->rmap->n; i++) {
1023     for (j=B->i[i]; j<B->i[i+1]; j++) {
1024       if ( garray[B->j[j]] > cstart) break;
1025       column_values[cnt++] = B->a[j];
1026     }
1027     for (k=A->i[i]; k<A->i[i+1]; k++) {
1028       column_values[cnt++] = A->a[k];
1029     }
1030     for (; j<B->i[i+1]; j++) {
1031       column_values[cnt++] = B->a[j];
1032     }
1033   }
1034   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1035 
1036   /* store the column values to the file */
1037   if (!rank) {
1038     MPI_Status status;
1039     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1040     for (i=1; i<size; i++) {
1041       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1042       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1043       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1044       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1045     }
1046   } else {
1047     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1048     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1049   }
1050   ierr = PetscFree(column_values);CHKERRQ(ierr);
1051   PetscFunctionReturn(0);
1052 }
1053 
1054 #undef __FUNCT__
1055 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1056 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1057 {
1058   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1059   PetscErrorCode    ierr;
1060   PetscMPIInt       rank = aij->rank,size = aij->size;
1061   PetscTruth        isdraw,iascii,isbinary;
1062   PetscViewer       sviewer;
1063   PetscViewerFormat format;
1064 
1065   PetscFunctionBegin;
1066   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1067   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1068   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1069   if (iascii) {
1070     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1071     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1072       MatInfo    info;
1073       PetscTruth inodes;
1074 
1075       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1076       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1077       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1078       if (!inodes) {
1079         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1080 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1081       } else {
1082         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1083 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1084       }
1085       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1086       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1087       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1088       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1089       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1090       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1091       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1092       PetscFunctionReturn(0);
1093     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1094       PetscInt   inodecount,inodelimit,*inodes;
1095       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1096       if (inodes) {
1097         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1098       } else {
1099         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1100       }
1101       PetscFunctionReturn(0);
1102     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1103       PetscFunctionReturn(0);
1104     }
1105   } else if (isbinary) {
1106     if (size == 1) {
1107       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1108       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1109     } else {
1110       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1111     }
1112     PetscFunctionReturn(0);
1113   } else if (isdraw) {
1114     PetscDraw  draw;
1115     PetscTruth isnull;
1116     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1117     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1118   }
1119 
1120   if (size == 1) {
1121     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1122     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1123   } else {
1124     /* assemble the entire matrix onto first processor. */
1125     Mat         A;
1126     Mat_SeqAIJ  *Aloc;
1127     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1128     MatScalar   *a;
1129 
1130     if (mat->rmap->N > 1024) {
1131       PetscTruth flg = PETSC_FALSE;
1132 
1133       ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1134       if (!flg) {
1135         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1136       }
1137     }
1138 
1139     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1140     if (!rank) {
1141       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1142     } else {
1143       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1144     }
1145     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1146     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1147     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1148     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1149 
1150     /* copy over the A part */
1151     Aloc = (Mat_SeqAIJ*)aij->A->data;
1152     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1153     row = mat->rmap->rstart;
1154     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1155     for (i=0; i<m; i++) {
1156       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1157       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1158     }
1159     aj = Aloc->j;
1160     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1161 
1162     /* copy over the B part */
1163     Aloc = (Mat_SeqAIJ*)aij->B->data;
1164     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1165     row  = mat->rmap->rstart;
1166     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1167     ct   = cols;
1168     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1169     for (i=0; i<m; i++) {
1170       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1171       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1172     }
1173     ierr = PetscFree(ct);CHKERRQ(ierr);
1174     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1175     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1176     /*
1177        Everyone has to call to draw the matrix since the graphics waits are
1178        synchronized across all processors that share the PetscDraw object
1179     */
1180     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1181     if (!rank) {
1182       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1183       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1184     }
1185     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1186     ierr = MatDestroy(A);CHKERRQ(ierr);
1187   }
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 #undef __FUNCT__
1192 #define __FUNCT__ "MatView_MPIAIJ"
1193 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1194 {
1195   PetscErrorCode ierr;
1196   PetscTruth     iascii,isdraw,issocket,isbinary;
1197 
1198   PetscFunctionBegin;
1199   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1200   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1201   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1202   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1203   if (iascii || isdraw || isbinary || issocket) {
1204     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1205   } else {
1206     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1207   }
1208   PetscFunctionReturn(0);
1209 }
1210 
1211 #undef __FUNCT__
1212 #define __FUNCT__ "MatRelax_MPIAIJ"
1213 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1214 {
1215   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1216   PetscErrorCode ierr;
1217   Vec            bb1 = 0;
1218   PetscTruth     hasop;
1219 
1220   PetscFunctionBegin;
1221   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1222     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1223   }
1224 
1225   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1226     if (flag & SOR_ZERO_INITIAL_GUESS) {
1227       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1228       its--;
1229     }
1230 
1231     while (its--) {
1232       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1233       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1234 
1235       /* update rhs: bb1 = bb - B*x */
1236       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1237       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1238 
1239       /* local sweep */
1240       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1241     }
1242   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1243     if (flag & SOR_ZERO_INITIAL_GUESS) {
1244       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1245       its--;
1246     }
1247     while (its--) {
1248       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1249       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1250 
1251       /* update rhs: bb1 = bb - B*x */
1252       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1253       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1254 
1255       /* local sweep */
1256       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1257     }
1258   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1259     if (flag & SOR_ZERO_INITIAL_GUESS) {
1260       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1261       its--;
1262     }
1263     while (its--) {
1264       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1265       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1266 
1267       /* update rhs: bb1 = bb - B*x */
1268       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1269       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1270 
1271       /* local sweep */
1272       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1273     }
1274   }  else if (flag & SOR_EISENSTAT) {
1275     Vec         xx1;
1276 
1277     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1278     ierr = (*mat->A->ops->relax)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1279 
1280     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1281     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1282     if (!mat->diag) {
1283       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1284       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1285     }
1286     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1287     if (hasop) {
1288       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1289     } else {
1290       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1291     }
1292     ierr = VecWAXPY(bb1,-1.0,bb1,bb);CHKERRQ(ierr);
1293     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1294 
1295     /* local sweep */
1296     ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1297     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1298     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1299   } else {
1300     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1301   }
1302 
1303   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 #undef __FUNCT__
1308 #define __FUNCT__ "MatPermute_MPIAIJ"
1309 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1310 {
1311   MPI_Comm       comm,pcomm;
1312   PetscInt       first,local_size,nrows;
1313   const PetscInt *rows;
1314   PetscMPIInt    size;
1315   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1316   PetscErrorCode ierr;
1317 
1318   PetscFunctionBegin;
1319   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1320   /* make a collective version of 'rowp' */
1321   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1322   if (pcomm==comm) {
1323     crowp = rowp;
1324   } else {
1325     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1326     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1327     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1328     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1329   }
1330   /* collect the global row permutation and invert it */
1331   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1332   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1333   if (pcomm!=comm) {
1334     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1335   }
1336   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1337   /* get the local target indices */
1338   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1339   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1340   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1341   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
1342   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1343   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1344   /* the column permutation is so much easier;
1345      make a local version of 'colp' and invert it */
1346   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1347   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1348   if (size==1) {
1349     lcolp = colp;
1350   } else {
1351     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1352     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1353     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
1354   }
1355   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1356   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1357   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1358   if (size>1) {
1359     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1360     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1361   }
1362   /* now we just get the submatrix */
1363   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1364   /* clean up */
1365   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1366   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1367   PetscFunctionReturn(0);
1368 }
1369 
1370 #undef __FUNCT__
1371 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1372 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1373 {
1374   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1375   Mat            A = mat->A,B = mat->B;
1376   PetscErrorCode ierr;
1377   PetscReal      isend[5],irecv[5];
1378 
1379   PetscFunctionBegin;
1380   info->block_size     = 1.0;
1381   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1382   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1383   isend[3] = info->memory;  isend[4] = info->mallocs;
1384   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1385   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1386   isend[3] += info->memory;  isend[4] += info->mallocs;
1387   if (flag == MAT_LOCAL) {
1388     info->nz_used      = isend[0];
1389     info->nz_allocated = isend[1];
1390     info->nz_unneeded  = isend[2];
1391     info->memory       = isend[3];
1392     info->mallocs      = isend[4];
1393   } else if (flag == MAT_GLOBAL_MAX) {
1394     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1395     info->nz_used      = irecv[0];
1396     info->nz_allocated = irecv[1];
1397     info->nz_unneeded  = irecv[2];
1398     info->memory       = irecv[3];
1399     info->mallocs      = irecv[4];
1400   } else if (flag == MAT_GLOBAL_SUM) {
1401     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1402     info->nz_used      = irecv[0];
1403     info->nz_allocated = irecv[1];
1404     info->nz_unneeded  = irecv[2];
1405     info->memory       = irecv[3];
1406     info->mallocs      = irecv[4];
1407   }
1408   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1409   info->fill_ratio_needed = 0;
1410   info->factor_mallocs    = 0;
1411 
1412   PetscFunctionReturn(0);
1413 }
1414 
1415 #undef __FUNCT__
1416 #define __FUNCT__ "MatSetOption_MPIAIJ"
1417 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1418 {
1419   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1420   PetscErrorCode ierr;
1421 
1422   PetscFunctionBegin;
1423   switch (op) {
1424   case MAT_NEW_NONZERO_LOCATIONS:
1425   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1426   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1427   case MAT_KEEP_NONZERO_PATTERN:
1428   case MAT_NEW_NONZERO_LOCATION_ERR:
1429   case MAT_USE_INODES:
1430   case MAT_IGNORE_ZERO_ENTRIES:
1431     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1432     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1433     break;
1434   case MAT_ROW_ORIENTED:
1435     a->roworiented = flg;
1436     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1437     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1438     break;
1439   case MAT_NEW_DIAGONALS:
1440     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1441     break;
1442   case MAT_IGNORE_OFF_PROC_ENTRIES:
1443     a->donotstash = PETSC_TRUE;
1444     break;
1445   case MAT_SYMMETRIC:
1446     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1447     break;
1448   case MAT_STRUCTURALLY_SYMMETRIC:
1449   case MAT_HERMITIAN:
1450   case MAT_SYMMETRY_ETERNAL:
1451     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1452     break;
1453   default:
1454     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1455   }
1456   PetscFunctionReturn(0);
1457 }
1458 
1459 #undef __FUNCT__
1460 #define __FUNCT__ "MatGetRow_MPIAIJ"
1461 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1462 {
1463   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1464   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1465   PetscErrorCode ierr;
1466   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1467   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1468   PetscInt       *cmap,*idx_p;
1469 
1470   PetscFunctionBegin;
1471   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1472   mat->getrowactive = PETSC_TRUE;
1473 
1474   if (!mat->rowvalues && (idx || v)) {
1475     /*
1476         allocate enough space to hold information from the longest row.
1477     */
1478     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1479     PetscInt     max = 1,tmp;
1480     for (i=0; i<matin->rmap->n; i++) {
1481       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1482       if (max < tmp) { max = tmp; }
1483     }
1484     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1485     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1486   }
1487 
1488   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1489   lrow = row - rstart;
1490 
1491   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1492   if (!v)   {pvA = 0; pvB = 0;}
1493   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1494   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1495   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1496   nztot = nzA + nzB;
1497 
1498   cmap  = mat->garray;
1499   if (v  || idx) {
1500     if (nztot) {
1501       /* Sort by increasing column numbers, assuming A and B already sorted */
1502       PetscInt imark = -1;
1503       if (v) {
1504         *v = v_p = mat->rowvalues;
1505         for (i=0; i<nzB; i++) {
1506           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1507           else break;
1508         }
1509         imark = i;
1510         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1511         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1512       }
1513       if (idx) {
1514         *idx = idx_p = mat->rowindices;
1515         if (imark > -1) {
1516           for (i=0; i<imark; i++) {
1517             idx_p[i] = cmap[cworkB[i]];
1518           }
1519         } else {
1520           for (i=0; i<nzB; i++) {
1521             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1522             else break;
1523           }
1524           imark = i;
1525         }
1526         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1527         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1528       }
1529     } else {
1530       if (idx) *idx = 0;
1531       if (v)   *v   = 0;
1532     }
1533   }
1534   *nz = nztot;
1535   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1536   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1537   PetscFunctionReturn(0);
1538 }
1539 
1540 #undef __FUNCT__
1541 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1542 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1543 {
1544   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1545 
1546   PetscFunctionBegin;
1547   if (!aij->getrowactive) {
1548     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1549   }
1550   aij->getrowactive = PETSC_FALSE;
1551   PetscFunctionReturn(0);
1552 }
1553 
1554 #undef __FUNCT__
1555 #define __FUNCT__ "MatNorm_MPIAIJ"
1556 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1557 {
1558   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1559   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1560   PetscErrorCode ierr;
1561   PetscInt       i,j,cstart = mat->cmap->rstart;
1562   PetscReal      sum = 0.0;
1563   MatScalar      *v;
1564 
1565   PetscFunctionBegin;
1566   if (aij->size == 1) {
1567     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1568   } else {
1569     if (type == NORM_FROBENIUS) {
1570       v = amat->a;
1571       for (i=0; i<amat->nz; i++) {
1572 #if defined(PETSC_USE_COMPLEX)
1573         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1574 #else
1575         sum += (*v)*(*v); v++;
1576 #endif
1577       }
1578       v = bmat->a;
1579       for (i=0; i<bmat->nz; i++) {
1580 #if defined(PETSC_USE_COMPLEX)
1581         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1582 #else
1583         sum += (*v)*(*v); v++;
1584 #endif
1585       }
1586       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1587       *norm = sqrt(*norm);
1588     } else if (type == NORM_1) { /* max column norm */
1589       PetscReal *tmp,*tmp2;
1590       PetscInt  *jj,*garray = aij->garray;
1591       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1592       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1593       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1594       *norm = 0.0;
1595       v = amat->a; jj = amat->j;
1596       for (j=0; j<amat->nz; j++) {
1597         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1598       }
1599       v = bmat->a; jj = bmat->j;
1600       for (j=0; j<bmat->nz; j++) {
1601         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1602       }
1603       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1604       for (j=0; j<mat->cmap->N; j++) {
1605         if (tmp2[j] > *norm) *norm = tmp2[j];
1606       }
1607       ierr = PetscFree(tmp);CHKERRQ(ierr);
1608       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1609     } else if (type == NORM_INFINITY) { /* max row norm */
1610       PetscReal ntemp = 0.0;
1611       for (j=0; j<aij->A->rmap->n; j++) {
1612         v = amat->a + amat->i[j];
1613         sum = 0.0;
1614         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1615           sum += PetscAbsScalar(*v); v++;
1616         }
1617         v = bmat->a + bmat->i[j];
1618         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1619           sum += PetscAbsScalar(*v); v++;
1620         }
1621         if (sum > ntemp) ntemp = sum;
1622       }
1623       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1624     } else {
1625       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1626     }
1627   }
1628   PetscFunctionReturn(0);
1629 }
1630 
1631 #undef __FUNCT__
1632 #define __FUNCT__ "MatTranspose_MPIAIJ"
1633 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1634 {
1635   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1636   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1637   PetscErrorCode ierr;
1638   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1639   PetscInt       cstart=A->cmap->rstart,ncol;
1640   Mat            B;
1641   MatScalar      *array;
1642 
1643   PetscFunctionBegin;
1644   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1645 
1646   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1647   ai = Aloc->i; aj = Aloc->j;
1648   bi = Bloc->i; bj = Bloc->j;
1649   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1650     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1651     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1652     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1653     for (i=0; i<ai[ma]; i++){
1654       d_nnz[aj[i]] ++;
1655       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1656     }
1657 
1658     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1659     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1660     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1661     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1662     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1663   } else {
1664     B = *matout;
1665   }
1666 
1667   /* copy over the A part */
1668   array = Aloc->a;
1669   row = A->rmap->rstart;
1670   for (i=0; i<ma; i++) {
1671     ncol = ai[i+1]-ai[i];
1672     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1673     row++; array += ncol; aj += ncol;
1674   }
1675   aj = Aloc->j;
1676   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1677 
1678   /* copy over the B part */
1679   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1680   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1681   array = Bloc->a;
1682   row = A->rmap->rstart;
1683   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1684   cols_tmp = cols;
1685   for (i=0; i<mb; i++) {
1686     ncol = bi[i+1]-bi[i];
1687     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1688     row++; array += ncol; cols_tmp += ncol;
1689   }
1690   ierr = PetscFree(cols);CHKERRQ(ierr);
1691 
1692   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1693   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1694   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1695     *matout = B;
1696   } else {
1697     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1698   }
1699   PetscFunctionReturn(0);
1700 }
1701 
1702 #undef __FUNCT__
1703 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1704 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1705 {
1706   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1707   Mat            a = aij->A,b = aij->B;
1708   PetscErrorCode ierr;
1709   PetscInt       s1,s2,s3;
1710 
1711   PetscFunctionBegin;
1712   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1713   if (rr) {
1714     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1715     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1716     /* Overlap communication with computation. */
1717     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1718   }
1719   if (ll) {
1720     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1721     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1722     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1723   }
1724   /* scale  the diagonal block */
1725   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1726 
1727   if (rr) {
1728     /* Do a scatter end and then right scale the off-diagonal block */
1729     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1730     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1731   }
1732 
1733   PetscFunctionReturn(0);
1734 }
1735 
1736 #undef __FUNCT__
1737 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1738 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1739 {
1740   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1741   PetscErrorCode ierr;
1742 
1743   PetscFunctionBegin;
1744   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1745   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1746   PetscFunctionReturn(0);
1747 }
1748 #undef __FUNCT__
1749 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1750 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1751 {
1752   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1753   PetscErrorCode ierr;
1754 
1755   PetscFunctionBegin;
1756   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1757   PetscFunctionReturn(0);
1758 }
1759 
1760 #undef __FUNCT__
1761 #define __FUNCT__ "MatEqual_MPIAIJ"
1762 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1763 {
1764   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1765   Mat            a,b,c,d;
1766   PetscTruth     flg;
1767   PetscErrorCode ierr;
1768 
1769   PetscFunctionBegin;
1770   a = matA->A; b = matA->B;
1771   c = matB->A; d = matB->B;
1772 
1773   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1774   if (flg) {
1775     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1776   }
1777   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1778   PetscFunctionReturn(0);
1779 }
1780 
1781 #undef __FUNCT__
1782 #define __FUNCT__ "MatCopy_MPIAIJ"
1783 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1784 {
1785   PetscErrorCode ierr;
1786   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1787   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1788 
1789   PetscFunctionBegin;
1790   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1791   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1792     /* because of the column compression in the off-processor part of the matrix a->B,
1793        the number of columns in a->B and b->B may be different, hence we cannot call
1794        the MatCopy() directly on the two parts. If need be, we can provide a more
1795        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1796        then copying the submatrices */
1797     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1798   } else {
1799     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1800     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1801   }
1802   PetscFunctionReturn(0);
1803 }
1804 
1805 #undef __FUNCT__
1806 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1807 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1808 {
1809   PetscErrorCode ierr;
1810 
1811   PetscFunctionBegin;
1812   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1813   PetscFunctionReturn(0);
1814 }
1815 
1816 #include "petscblaslapack.h"
1817 #undef __FUNCT__
1818 #define __FUNCT__ "MatAXPY_MPIAIJ"
1819 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1820 {
1821   PetscErrorCode ierr;
1822   PetscInt       i;
1823   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1824   PetscBLASInt   bnz,one=1;
1825   Mat_SeqAIJ     *x,*y;
1826 
1827   PetscFunctionBegin;
1828   if (str == SAME_NONZERO_PATTERN) {
1829     PetscScalar alpha = a;
1830     x = (Mat_SeqAIJ *)xx->A->data;
1831     y = (Mat_SeqAIJ *)yy->A->data;
1832     bnz = PetscBLASIntCast(x->nz);
1833     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1834     x = (Mat_SeqAIJ *)xx->B->data;
1835     y = (Mat_SeqAIJ *)yy->B->data;
1836     bnz = PetscBLASIntCast(x->nz);
1837     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1838   } else if (str == SUBSET_NONZERO_PATTERN) {
1839     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1840 
1841     x = (Mat_SeqAIJ *)xx->B->data;
1842     y = (Mat_SeqAIJ *)yy->B->data;
1843     if (y->xtoy && y->XtoY != xx->B) {
1844       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1845       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1846     }
1847     if (!y->xtoy) { /* get xtoy */
1848       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1849       y->XtoY = xx->B;
1850       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1851     }
1852     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1853   } else {
1854     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1855   }
1856   PetscFunctionReturn(0);
1857 }
1858 
1859 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1860 
1861 #undef __FUNCT__
1862 #define __FUNCT__ "MatConjugate_MPIAIJ"
1863 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1864 {
1865 #if defined(PETSC_USE_COMPLEX)
1866   PetscErrorCode ierr;
1867   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1868 
1869   PetscFunctionBegin;
1870   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1871   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1872 #else
1873   PetscFunctionBegin;
1874 #endif
1875   PetscFunctionReturn(0);
1876 }
1877 
1878 #undef __FUNCT__
1879 #define __FUNCT__ "MatRealPart_MPIAIJ"
1880 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1881 {
1882   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1883   PetscErrorCode ierr;
1884 
1885   PetscFunctionBegin;
1886   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1887   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1888   PetscFunctionReturn(0);
1889 }
1890 
1891 #undef __FUNCT__
1892 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1893 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1894 {
1895   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1896   PetscErrorCode ierr;
1897 
1898   PetscFunctionBegin;
1899   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1900   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1901   PetscFunctionReturn(0);
1902 }
1903 
1904 #ifdef PETSC_HAVE_PBGL
1905 
1906 #include <boost/parallel/mpi/bsp_process_group.hpp>
1907 #include <boost/graph/distributed/ilu_default_graph.hpp>
1908 #include <boost/graph/distributed/ilu_0_block.hpp>
1909 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1910 #include <boost/graph/distributed/petsc/interface.hpp>
1911 #include <boost/multi_array.hpp>
1912 #include <boost/parallel/distributed_property_map->hpp>
1913 
1914 #undef __FUNCT__
1915 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1916 /*
1917   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1918 */
1919 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1920 {
1921   namespace petsc = boost::distributed::petsc;
1922 
1923   namespace graph_dist = boost::graph::distributed;
1924   using boost::graph::distributed::ilu_default::process_group_type;
1925   using boost::graph::ilu_permuted;
1926 
1927   PetscTruth      row_identity, col_identity;
1928   PetscContainer  c;
1929   PetscInt        m, n, M, N;
1930   PetscErrorCode  ierr;
1931 
1932   PetscFunctionBegin;
1933   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1934   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1935   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1936   if (!row_identity || !col_identity) {
1937     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1938   }
1939 
1940   process_group_type pg;
1941   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1942   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1943   lgraph_type&   level_graph = *lgraph_p;
1944   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1945 
1946   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1947   ilu_permuted(level_graph);
1948 
1949   /* put together the new matrix */
1950   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1951   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1952   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1953   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1954   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1955   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1956   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1957 
1958   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1959   ierr = PetscContainerSetPointer(c, lgraph_p);
1960   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1961   PetscFunctionReturn(0);
1962 }
1963 
1964 #undef __FUNCT__
1965 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1966 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1967 {
1968   PetscFunctionBegin;
1969   PetscFunctionReturn(0);
1970 }
1971 
1972 #undef __FUNCT__
1973 #define __FUNCT__ "MatSolve_MPIAIJ"
1974 /*
1975   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1976 */
1977 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1978 {
1979   namespace graph_dist = boost::graph::distributed;
1980 
1981   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1982   lgraph_type*   lgraph_p;
1983   PetscContainer c;
1984   PetscErrorCode ierr;
1985 
1986   PetscFunctionBegin;
1987   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1988   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1989   ierr = VecCopy(b, x);CHKERRQ(ierr);
1990 
1991   PetscScalar* array_x;
1992   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1993   PetscInt sx;
1994   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1995 
1996   PetscScalar* array_b;
1997   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1998   PetscInt sb;
1999   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2000 
2001   lgraph_type&   level_graph = *lgraph_p;
2002   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2003 
2004   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2005   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2006                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2007 
2008   typedef boost::iterator_property_map<array_ref_type::iterator,
2009                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2010   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2011                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2012 
2013   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2014 
2015   PetscFunctionReturn(0);
2016 }
2017 #endif
2018 
2019 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2020   PetscInt       nzlocal,nsends,nrecvs;
2021   PetscMPIInt    *send_rank;
2022   PetscInt       *sbuf_nz,*sbuf_j,**rbuf_j;
2023   PetscScalar    *sbuf_a,**rbuf_a;
2024   PetscErrorCode (*MatDestroy)(Mat);
2025 } Mat_Redundant;
2026 
2027 #undef __FUNCT__
2028 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2029 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2030 {
2031   PetscErrorCode       ierr;
2032   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2033   PetscInt             i;
2034 
2035   PetscFunctionBegin;
2036   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
2037   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2038   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2039   for (i=0; i<redund->nrecvs; i++){
2040     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2041     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2042   }
2043   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2044   ierr = PetscFree(redund);CHKERRQ(ierr);
2045   PetscFunctionReturn(0);
2046 }
2047 
2048 #undef __FUNCT__
2049 #define __FUNCT__ "MatDestroy_MatRedundant"
2050 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2051 {
2052   PetscErrorCode  ierr;
2053   PetscContainer  container;
2054   Mat_Redundant   *redund=PETSC_NULL;
2055 
2056   PetscFunctionBegin;
2057   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2058   if (container) {
2059     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2060   } else {
2061     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2062   }
2063   A->ops->destroy = redund->MatDestroy;
2064   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2065   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2066   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2067   PetscFunctionReturn(0);
2068 }
2069 
2070 #undef __FUNCT__
2071 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2072 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2073 {
2074   PetscMPIInt    rank,size;
2075   MPI_Comm       comm=((PetscObject)mat)->comm;
2076   PetscErrorCode ierr;
2077   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2078   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2079   PetscInt       *rowrange=mat->rmap->range;
2080   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2081   Mat            A=aij->A,B=aij->B,C=*matredundant;
2082   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2083   PetscScalar    *sbuf_a;
2084   PetscInt       nzlocal=a->nz+b->nz;
2085   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2086   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2087   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2088   MatScalar      *aworkA,*aworkB;
2089   PetscScalar    *vals;
2090   PetscMPIInt    tag1,tag2,tag3,imdex;
2091   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2092                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2093   MPI_Status     recv_status,*send_status;
2094   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2095   PetscInt       **rbuf_j=PETSC_NULL;
2096   PetscScalar    **rbuf_a=PETSC_NULL;
2097   Mat_Redundant  *redund=PETSC_NULL;
2098   PetscContainer container;
2099 
2100   PetscFunctionBegin;
2101   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2102   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2103 
2104   if (reuse == MAT_REUSE_MATRIX) {
2105     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2106     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2107     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2108     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2109     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2110     if (container) {
2111       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2112     } else {
2113       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2114     }
2115     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2116 
2117     nsends    = redund->nsends;
2118     nrecvs    = redund->nrecvs;
2119     send_rank = redund->send_rank; recv_rank = send_rank + size;
2120     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
2121     sbuf_j    = redund->sbuf_j;
2122     sbuf_a    = redund->sbuf_a;
2123     rbuf_j    = redund->rbuf_j;
2124     rbuf_a    = redund->rbuf_a;
2125   }
2126 
2127   if (reuse == MAT_INITIAL_MATRIX){
2128     PetscMPIInt  subrank,subsize;
2129     PetscInt     nleftover,np_subcomm;
2130     /* get the destination processors' id send_rank, nsends and nrecvs */
2131     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2132     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2133     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
2134     recv_rank = send_rank + size;
2135     np_subcomm = size/nsubcomm;
2136     nleftover  = size - nsubcomm*np_subcomm;
2137     nsends = 0; nrecvs = 0;
2138     for (i=0; i<size; i++){ /* i=rank*/
2139       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2140         send_rank[nsends] = i; nsends++;
2141         recv_rank[nrecvs++] = i;
2142       }
2143     }
2144     if (rank >= size - nleftover){/* this proc is a leftover processor */
2145       i = size-nleftover-1;
2146       j = 0;
2147       while (j < nsubcomm - nleftover){
2148         send_rank[nsends++] = i;
2149         i--; j++;
2150       }
2151     }
2152 
2153     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2154       for (i=0; i<nleftover; i++){
2155         recv_rank[nrecvs++] = size-nleftover+i;
2156       }
2157     }
2158 
2159     /* allocate sbuf_j, sbuf_a */
2160     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2161     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2162     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2163   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2164 
2165   /* copy mat's local entries into the buffers */
2166   if (reuse == MAT_INITIAL_MATRIX){
2167     rownz_max = 0;
2168     rptr = sbuf_j;
2169     cols = sbuf_j + rend-rstart + 1;
2170     vals = sbuf_a;
2171     rptr[0] = 0;
2172     for (i=0; i<rend-rstart; i++){
2173       row = i + rstart;
2174       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2175       ncols  = nzA + nzB;
2176       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2177       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2178       /* load the column indices for this row into cols */
2179       lwrite = 0;
2180       for (l=0; l<nzB; l++) {
2181         if ((ctmp = bmap[cworkB[l]]) < cstart){
2182           vals[lwrite]   = aworkB[l];
2183           cols[lwrite++] = ctmp;
2184         }
2185       }
2186       for (l=0; l<nzA; l++){
2187         vals[lwrite]   = aworkA[l];
2188         cols[lwrite++] = cstart + cworkA[l];
2189       }
2190       for (l=0; l<nzB; l++) {
2191         if ((ctmp = bmap[cworkB[l]]) >= cend){
2192           vals[lwrite]   = aworkB[l];
2193           cols[lwrite++] = ctmp;
2194         }
2195       }
2196       vals += ncols;
2197       cols += ncols;
2198       rptr[i+1] = rptr[i] + ncols;
2199       if (rownz_max < ncols) rownz_max = ncols;
2200     }
2201     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2202   } else { /* only copy matrix values into sbuf_a */
2203     rptr = sbuf_j;
2204     vals = sbuf_a;
2205     rptr[0] = 0;
2206     for (i=0; i<rend-rstart; i++){
2207       row = i + rstart;
2208       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2209       ncols  = nzA + nzB;
2210       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2211       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2212       lwrite = 0;
2213       for (l=0; l<nzB; l++) {
2214         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2215       }
2216       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2217       for (l=0; l<nzB; l++) {
2218         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2219       }
2220       vals += ncols;
2221       rptr[i+1] = rptr[i] + ncols;
2222     }
2223   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2224 
2225   /* send nzlocal to others, and recv other's nzlocal */
2226   /*--------------------------------------------------*/
2227   if (reuse == MAT_INITIAL_MATRIX){
2228     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2229     s_waits2 = s_waits3 + nsends;
2230     s_waits1 = s_waits2 + nsends;
2231     r_waits1 = s_waits1 + nsends;
2232     r_waits2 = r_waits1 + nrecvs;
2233     r_waits3 = r_waits2 + nrecvs;
2234   } else {
2235     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2236     r_waits3 = s_waits3 + nsends;
2237   }
2238 
2239   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2240   if (reuse == MAT_INITIAL_MATRIX){
2241     /* get new tags to keep the communication clean */
2242     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2243     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2244     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2245     rbuf_nz = sbuf_nz + nsends;
2246 
2247     /* post receives of other's nzlocal */
2248     for (i=0; i<nrecvs; i++){
2249       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2250     }
2251     /* send nzlocal to others */
2252     for (i=0; i<nsends; i++){
2253       sbuf_nz[i] = nzlocal;
2254       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2255     }
2256     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2257     count = nrecvs;
2258     while (count) {
2259       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2260       recv_rank[imdex] = recv_status.MPI_SOURCE;
2261       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2262       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2263 
2264       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2265       rbuf_nz[imdex] += i + 2;
2266       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2267       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2268       count--;
2269     }
2270     /* wait on sends of nzlocal */
2271     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2272     /* send mat->i,j to others, and recv from other's */
2273     /*------------------------------------------------*/
2274     for (i=0; i<nsends; i++){
2275       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2276       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2277     }
2278     /* wait on receives of mat->i,j */
2279     /*------------------------------*/
2280     count = nrecvs;
2281     while (count) {
2282       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2283       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2284       count--;
2285     }
2286     /* wait on sends of mat->i,j */
2287     /*---------------------------*/
2288     if (nsends) {
2289       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2290     }
2291   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2292 
2293   /* post receives, send and receive mat->a */
2294   /*----------------------------------------*/
2295   for (imdex=0; imdex<nrecvs; imdex++) {
2296     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2297   }
2298   for (i=0; i<nsends; i++){
2299     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2300   }
2301   count = nrecvs;
2302   while (count) {
2303     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2304     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2305     count--;
2306   }
2307   if (nsends) {
2308     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2309   }
2310 
2311   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2312 
2313   /* create redundant matrix */
2314   /*-------------------------*/
2315   if (reuse == MAT_INITIAL_MATRIX){
2316     /* compute rownz_max for preallocation */
2317     for (imdex=0; imdex<nrecvs; imdex++){
2318       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2319       rptr = rbuf_j[imdex];
2320       for (i=0; i<j; i++){
2321         ncols = rptr[i+1] - rptr[i];
2322         if (rownz_max < ncols) rownz_max = ncols;
2323       }
2324     }
2325 
2326     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2327     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2328     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2329     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2330     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2331   } else {
2332     C = *matredundant;
2333   }
2334 
2335   /* insert local matrix entries */
2336   rptr = sbuf_j;
2337   cols = sbuf_j + rend-rstart + 1;
2338   vals = sbuf_a;
2339   for (i=0; i<rend-rstart; i++){
2340     row   = i + rstart;
2341     ncols = rptr[i+1] - rptr[i];
2342     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2343     vals += ncols;
2344     cols += ncols;
2345   }
2346   /* insert received matrix entries */
2347   for (imdex=0; imdex<nrecvs; imdex++){
2348     rstart = rowrange[recv_rank[imdex]];
2349     rend   = rowrange[recv_rank[imdex]+1];
2350     rptr = rbuf_j[imdex];
2351     cols = rbuf_j[imdex] + rend-rstart + 1;
2352     vals = rbuf_a[imdex];
2353     for (i=0; i<rend-rstart; i++){
2354       row   = i + rstart;
2355       ncols = rptr[i+1] - rptr[i];
2356       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2357       vals += ncols;
2358       cols += ncols;
2359     }
2360   }
2361   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2362   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2363   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2364   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2365   if (reuse == MAT_INITIAL_MATRIX){
2366     PetscContainer container;
2367     *matredundant = C;
2368     /* create a supporting struct and attach it to C for reuse */
2369     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2370     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2371     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2372     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2373     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2374 
2375     redund->nzlocal = nzlocal;
2376     redund->nsends  = nsends;
2377     redund->nrecvs  = nrecvs;
2378     redund->send_rank = send_rank;
2379     redund->sbuf_nz = sbuf_nz;
2380     redund->sbuf_j  = sbuf_j;
2381     redund->sbuf_a  = sbuf_a;
2382     redund->rbuf_j  = rbuf_j;
2383     redund->rbuf_a  = rbuf_a;
2384 
2385     redund->MatDestroy = C->ops->destroy;
2386     C->ops->destroy    = MatDestroy_MatRedundant;
2387   }
2388   PetscFunctionReturn(0);
2389 }
2390 
2391 #undef __FUNCT__
2392 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2393 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2394 {
2395   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2396   PetscErrorCode ierr;
2397   PetscInt       i,*idxb = 0;
2398   PetscScalar    *va,*vb;
2399   Vec            vtmp;
2400 
2401   PetscFunctionBegin;
2402   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2403   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2404   if (idx) {
2405     for (i=0; i<A->rmap->n; i++) {
2406       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2407     }
2408   }
2409 
2410   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2411   if (idx) {
2412     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2413   }
2414   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2415   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2416 
2417   for (i=0; i<A->rmap->n; i++){
2418     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2419       va[i] = vb[i];
2420       if (idx) idx[i] = a->garray[idxb[i]];
2421     }
2422   }
2423 
2424   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2425   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2426   if (idxb) {
2427     ierr = PetscFree(idxb);CHKERRQ(ierr);
2428   }
2429   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2430   PetscFunctionReturn(0);
2431 }
2432 
2433 #undef __FUNCT__
2434 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2435 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2436 {
2437   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2438   PetscErrorCode ierr;
2439   PetscInt       i,*idxb = 0;
2440   PetscScalar    *va,*vb;
2441   Vec            vtmp;
2442 
2443   PetscFunctionBegin;
2444   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2445   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2446   if (idx) {
2447     for (i=0; i<A->cmap->n; i++) {
2448       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2449     }
2450   }
2451 
2452   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2453   if (idx) {
2454     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2455   }
2456   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2457   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2458 
2459   for (i=0; i<A->rmap->n; i++){
2460     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2461       va[i] = vb[i];
2462       if (idx) idx[i] = a->garray[idxb[i]];
2463     }
2464   }
2465 
2466   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2467   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2468   if (idxb) {
2469     ierr = PetscFree(idxb);CHKERRQ(ierr);
2470   }
2471   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2472   PetscFunctionReturn(0);
2473 }
2474 
2475 #undef __FUNCT__
2476 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2477 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2478 {
2479   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2480   PetscInt       n      = A->rmap->n;
2481   PetscInt       cstart = A->cmap->rstart;
2482   PetscInt      *cmap   = mat->garray;
2483   PetscInt      *diagIdx, *offdiagIdx;
2484   Vec            diagV, offdiagV;
2485   PetscScalar   *a, *diagA, *offdiagA;
2486   PetscInt       r;
2487   PetscErrorCode ierr;
2488 
2489   PetscFunctionBegin;
2490   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2491   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2492   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2493   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2494   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2495   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2496   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2497   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2498   for(r = 0; r < n; ++r) {
2499     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2500       a[r]   = diagA[r];
2501       idx[r] = cstart + diagIdx[r];
2502     } else {
2503       a[r]   = offdiagA[r];
2504       idx[r] = cmap[offdiagIdx[r]];
2505     }
2506   }
2507   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2508   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2509   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2510   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2511   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2512   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2513   PetscFunctionReturn(0);
2514 }
2515 
2516 #undef __FUNCT__
2517 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2518 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2519 {
2520   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2521   PetscInt       n      = A->rmap->n;
2522   PetscInt       cstart = A->cmap->rstart;
2523   PetscInt      *cmap   = mat->garray;
2524   PetscInt      *diagIdx, *offdiagIdx;
2525   Vec            diagV, offdiagV;
2526   PetscScalar   *a, *diagA, *offdiagA;
2527   PetscInt       r;
2528   PetscErrorCode ierr;
2529 
2530   PetscFunctionBegin;
2531   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2532   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2533   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2534   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2535   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2536   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2537   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2538   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2539   for(r = 0; r < n; ++r) {
2540     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2541       a[r]   = diagA[r];
2542       idx[r] = cstart + diagIdx[r];
2543     } else {
2544       a[r]   = offdiagA[r];
2545       idx[r] = cmap[offdiagIdx[r]];
2546     }
2547   }
2548   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2549   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2550   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2551   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2552   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2553   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2554   PetscFunctionReturn(0);
2555 }
2556 
2557 #undef __FUNCT__
2558 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2559 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[])
2560 {
2561   PetscErrorCode ierr;
2562 
2563   PetscFunctionBegin;
2564   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
2565   PetscFunctionReturn(0);
2566 }
2567 
2568 /* -------------------------------------------------------------------*/
2569 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2570        MatGetRow_MPIAIJ,
2571        MatRestoreRow_MPIAIJ,
2572        MatMult_MPIAIJ,
2573 /* 4*/ MatMultAdd_MPIAIJ,
2574        MatMultTranspose_MPIAIJ,
2575        MatMultTransposeAdd_MPIAIJ,
2576 #ifdef PETSC_HAVE_PBGL
2577        MatSolve_MPIAIJ,
2578 #else
2579        0,
2580 #endif
2581        0,
2582        0,
2583 /*10*/ 0,
2584        0,
2585        0,
2586        MatRelax_MPIAIJ,
2587        MatTranspose_MPIAIJ,
2588 /*15*/ MatGetInfo_MPIAIJ,
2589        MatEqual_MPIAIJ,
2590        MatGetDiagonal_MPIAIJ,
2591        MatDiagonalScale_MPIAIJ,
2592        MatNorm_MPIAIJ,
2593 /*20*/ MatAssemblyBegin_MPIAIJ,
2594        MatAssemblyEnd_MPIAIJ,
2595        MatSetOption_MPIAIJ,
2596        MatZeroEntries_MPIAIJ,
2597 /*24*/ MatZeroRows_MPIAIJ,
2598        0,
2599 #ifdef PETSC_HAVE_PBGL
2600        0,
2601 #else
2602        0,
2603 #endif
2604        0,
2605        0,
2606 /*29*/ MatSetUpPreallocation_MPIAIJ,
2607 #ifdef PETSC_HAVE_PBGL
2608        0,
2609 #else
2610        0,
2611 #endif
2612        0,
2613        0,
2614        0,
2615 /*34*/ MatDuplicate_MPIAIJ,
2616        0,
2617        0,
2618        0,
2619        0,
2620 /*39*/ MatAXPY_MPIAIJ,
2621        MatGetSubMatrices_MPIAIJ,
2622        MatIncreaseOverlap_MPIAIJ,
2623        MatGetValues_MPIAIJ,
2624        MatCopy_MPIAIJ,
2625 /*44*/ MatGetRowMax_MPIAIJ,
2626        MatScale_MPIAIJ,
2627        0,
2628        0,
2629        0,
2630 /*49*/ MatSetBlockSize_MPIAIJ,
2631        0,
2632        0,
2633        0,
2634        0,
2635 /*54*/ MatFDColoringCreate_MPIAIJ,
2636        0,
2637        MatSetUnfactored_MPIAIJ,
2638        MatPermute_MPIAIJ,
2639        0,
2640 /*59*/ MatGetSubMatrix_MPIAIJ,
2641        MatDestroy_MPIAIJ,
2642        MatView_MPIAIJ,
2643        0,
2644        0,
2645 /*64*/ 0,
2646        0,
2647        0,
2648        0,
2649        0,
2650 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2651        MatGetRowMinAbs_MPIAIJ,
2652        0,
2653        MatSetColoring_MPIAIJ,
2654 #if defined(PETSC_HAVE_ADIC)
2655        MatSetValuesAdic_MPIAIJ,
2656 #else
2657        0,
2658 #endif
2659        MatSetValuesAdifor_MPIAIJ,
2660 /*75*/ 0,
2661        0,
2662        0,
2663        0,
2664        0,
2665 /*80*/ 0,
2666        0,
2667        0,
2668 /*83*/ MatLoad_MPIAIJ,
2669        0,
2670        0,
2671        0,
2672        0,
2673        0,
2674 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2675        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2676        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2677        MatPtAP_Basic,
2678        MatPtAPSymbolic_MPIAIJ,
2679 /*94*/ MatPtAPNumeric_MPIAIJ,
2680        0,
2681        0,
2682        0,
2683        0,
2684 /*99*/ 0,
2685        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2686        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2687        MatConjugate_MPIAIJ,
2688        0,
2689 /*104*/MatSetValuesRow_MPIAIJ,
2690        MatRealPart_MPIAIJ,
2691        MatImaginaryPart_MPIAIJ,
2692        0,
2693        0,
2694 /*109*/0,
2695        MatGetRedundantMatrix_MPIAIJ,
2696        MatGetRowMin_MPIAIJ,
2697        0,
2698        0,
2699 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
2700        0,
2701        0,
2702        0,
2703        0,
2704        0
2705 };
2706 
2707 /* ----------------------------------------------------------------------------------------*/
2708 
2709 EXTERN_C_BEGIN
2710 #undef __FUNCT__
2711 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2712 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2713 {
2714   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2715   PetscErrorCode ierr;
2716 
2717   PetscFunctionBegin;
2718   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2719   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2720   PetscFunctionReturn(0);
2721 }
2722 EXTERN_C_END
2723 
2724 EXTERN_C_BEGIN
2725 #undef __FUNCT__
2726 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2727 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2728 {
2729   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2730   PetscErrorCode ierr;
2731 
2732   PetscFunctionBegin;
2733   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2734   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2735   PetscFunctionReturn(0);
2736 }
2737 EXTERN_C_END
2738 
2739 #include "petscpc.h"
2740 EXTERN_C_BEGIN
2741 #undef __FUNCT__
2742 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2743 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2744 {
2745   Mat_MPIAIJ     *b;
2746   PetscErrorCode ierr;
2747   PetscInt       i;
2748 
2749   PetscFunctionBegin;
2750   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2751   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2752   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2753   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2754 
2755   ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2756   ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2757   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2758   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2759   if (d_nnz) {
2760     for (i=0; i<B->rmap->n; i++) {
2761       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2762     }
2763   }
2764   if (o_nnz) {
2765     for (i=0; i<B->rmap->n; i++) {
2766       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2767     }
2768   }
2769   b = (Mat_MPIAIJ*)B->data;
2770 
2771   if (!B->preallocated) {
2772     /* Explicitly create 2 MATSEQAIJ matrices. */
2773     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2774     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2775     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2776     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2777     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2778     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2779     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2780     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2781   }
2782 
2783   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2784   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2785   B->preallocated = PETSC_TRUE;
2786   PetscFunctionReturn(0);
2787 }
2788 EXTERN_C_END
2789 
2790 #undef __FUNCT__
2791 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2792 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2793 {
2794   Mat            mat;
2795   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2796   PetscErrorCode ierr;
2797 
2798   PetscFunctionBegin;
2799   *newmat       = 0;
2800   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2801   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2802   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2803   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2804   a    = (Mat_MPIAIJ*)mat->data;
2805 
2806   mat->factor       = matin->factor;
2807   mat->rmap->bs      = matin->rmap->bs;
2808   mat->assembled    = PETSC_TRUE;
2809   mat->insertmode   = NOT_SET_VALUES;
2810   mat->preallocated = PETSC_TRUE;
2811 
2812   a->size           = oldmat->size;
2813   a->rank           = oldmat->rank;
2814   a->donotstash     = oldmat->donotstash;
2815   a->roworiented    = oldmat->roworiented;
2816   a->rowindices     = 0;
2817   a->rowvalues      = 0;
2818   a->getrowactive   = PETSC_FALSE;
2819 
2820   ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr);
2821   ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr);
2822 
2823   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2824   if (oldmat->colmap) {
2825 #if defined (PETSC_USE_CTABLE)
2826     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2827 #else
2828     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2829     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2830     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2831 #endif
2832   } else a->colmap = 0;
2833   if (oldmat->garray) {
2834     PetscInt len;
2835     len  = oldmat->B->cmap->n;
2836     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2837     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2838     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2839   } else a->garray = 0;
2840 
2841   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2842   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2843   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2844   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2845   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2846   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2847   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2848   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2849   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2850   *newmat = mat;
2851   PetscFunctionReturn(0);
2852 }
2853 
2854 #include "petscsys.h"
2855 
2856 #undef __FUNCT__
2857 #define __FUNCT__ "MatLoad_MPIAIJ"
2858 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2859 {
2860   Mat            A;
2861   PetscScalar    *vals,*svals;
2862   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2863   MPI_Status     status;
2864   PetscErrorCode ierr;
2865   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz;
2866   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2867   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2868   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2869   PetscInt       cend,cstart,n,*rowners;
2870   int            fd;
2871 
2872   PetscFunctionBegin;
2873   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2874   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2875   if (!rank) {
2876     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2877     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2878     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2879   }
2880 
2881   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2882   M = header[1]; N = header[2];
2883   /* determine ownership of all rows */
2884   m    = M/size + ((M % size) > rank);
2885   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2886   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2887 
2888   /* First process needs enough room for process with most rows */
2889   if (!rank) {
2890     mmax       = rowners[1];
2891     for (i=2; i<size; i++) {
2892       mmax = PetscMax(mmax,rowners[i]);
2893     }
2894   } else mmax = m;
2895 
2896   rowners[0] = 0;
2897   for (i=2; i<=size; i++) {
2898     rowners[i] += rowners[i-1];
2899   }
2900   rstart = rowners[rank];
2901   rend   = rowners[rank+1];
2902 
2903   /* distribute row lengths to all processors */
2904   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2905   if (!rank) {
2906     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2907     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2908     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2909     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2910     for (j=0; j<m; j++) {
2911       procsnz[0] += ourlens[j];
2912     }
2913     for (i=1; i<size; i++) {
2914       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2915       /* calculate the number of nonzeros on each processor */
2916       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2917         procsnz[i] += rowlengths[j];
2918       }
2919       mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]);
2920       ierr   = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2921     }
2922     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2923   } else {
2924     mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr);
2925     ierr   = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2926   }
2927 
2928   if (!rank) {
2929     /* determine max buffer needed and allocate it */
2930     maxnz = 0;
2931     for (i=0; i<size; i++) {
2932       maxnz = PetscMax(maxnz,procsnz[i]);
2933     }
2934     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2935 
2936     /* read in my part of the matrix column indices  */
2937     nz   = procsnz[0];
2938     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2939     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2940 
2941     /* read in every one elses and ship off */
2942     for (i=1; i<size; i++) {
2943       nz     = procsnz[i];
2944       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2945       mpicnt = PetscMPIIntCast(nz);
2946       ierr   = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2947     }
2948     ierr = PetscFree(cols);CHKERRQ(ierr);
2949   } else {
2950     /* determine buffer space needed for message */
2951     nz = 0;
2952     for (i=0; i<m; i++) {
2953       nz += ourlens[i];
2954     }
2955     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2956 
2957     /* receive message of column indices*/
2958     mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr);
2959     ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2960     ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr);
2961     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
2962     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
2963     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
2964   }
2965 
2966   /* determine column ownership if matrix is not square */
2967   if (N != M) {
2968     n      = N/size + ((N % size) > rank);
2969     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2970     cstart = cend - n;
2971   } else {
2972     cstart = rstart;
2973     cend   = rend;
2974     n      = cend - cstart;
2975   }
2976 
2977   /* loop over local rows, determining number of off diagonal entries */
2978   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2979   jj = 0;
2980   for (i=0; i<m; i++) {
2981     for (j=0; j<ourlens[i]; j++) {
2982       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2983       jj++;
2984     }
2985   }
2986 
2987   /* create our matrix */
2988   for (i=0; i<m; i++) {
2989     ourlens[i] -= offlens[i];
2990   }
2991   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2992   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2993   ierr = MatSetType(A,type);CHKERRQ(ierr);
2994   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2995 
2996   for (i=0; i<m; i++) {
2997     ourlens[i] += offlens[i];
2998   }
2999 
3000   if (!rank) {
3001     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3002 
3003     /* read in my part of the matrix numerical values  */
3004     nz   = procsnz[0];
3005     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3006 
3007     /* insert into matrix */
3008     jj      = rstart;
3009     smycols = mycols;
3010     svals   = vals;
3011     for (i=0; i<m; i++) {
3012       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3013       smycols += ourlens[i];
3014       svals   += ourlens[i];
3015       jj++;
3016     }
3017 
3018     /* read in other processors and ship out */
3019     for (i=1; i<size; i++) {
3020       nz     = procsnz[i];
3021       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3022       mpicnt = PetscMPIIntCast(nz);
3023       ierr   = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3024     }
3025     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3026   } else {
3027     /* receive numeric values */
3028     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3029 
3030     /* receive message of values*/
3031     mpicnt = PetscMPIIntCast(nz);
3032     ierr   = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
3033     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr);
3034     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
3035     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
3036     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
3037 
3038     /* insert into matrix */
3039     jj      = rstart;
3040     smycols = mycols;
3041     svals   = vals;
3042     for (i=0; i<m; i++) {
3043       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3044       smycols += ourlens[i];
3045       svals   += ourlens[i];
3046       jj++;
3047     }
3048   }
3049   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3050   ierr = PetscFree(vals);CHKERRQ(ierr);
3051   ierr = PetscFree(mycols);CHKERRQ(ierr);
3052   ierr = PetscFree(rowners);CHKERRQ(ierr);
3053 
3054   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3055   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3056   *newmat = A;
3057   PetscFunctionReturn(0);
3058 }
3059 
3060 #undef __FUNCT__
3061 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3062 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3063 {
3064   PetscErrorCode ierr;
3065   IS             iscol_local;
3066   PetscInt       csize;
3067 
3068   PetscFunctionBegin;
3069   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3070   if (call == MAT_REUSE_MATRIX) {
3071     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3072     if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3073   } else {
3074     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3075   }
3076   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3077   if (call == MAT_INITIAL_MATRIX) {
3078     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3079     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3080   }
3081   PetscFunctionReturn(0);
3082 }
3083 
3084 #undef __FUNCT__
3085 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3086 /*
3087     Not great since it makes two copies of the submatrix, first an SeqAIJ
3088   in local and then by concatenating the local matrices the end result.
3089   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3090 
3091   Note: This requires a sequential iscol with all indices.
3092 */
3093 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3094 {
3095   PetscErrorCode ierr;
3096   PetscMPIInt    rank,size;
3097   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3098   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3099   Mat            *local,M,Mreuse;
3100   MatScalar      *vwork,*aa;
3101   MPI_Comm       comm = ((PetscObject)mat)->comm;
3102   Mat_SeqAIJ     *aij;
3103 
3104 
3105   PetscFunctionBegin;
3106   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3107   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3108 
3109   if (call ==  MAT_REUSE_MATRIX) {
3110     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3111     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3112     local = &Mreuse;
3113     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3114   } else {
3115     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3116     Mreuse = *local;
3117     ierr   = PetscFree(local);CHKERRQ(ierr);
3118   }
3119 
3120   /*
3121       m - number of local rows
3122       n - number of columns (same on all processors)
3123       rstart - first row in new global matrix generated
3124   */
3125   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3126   if (call == MAT_INITIAL_MATRIX) {
3127     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3128     ii  = aij->i;
3129     jj  = aij->j;
3130 
3131     /*
3132         Determine the number of non-zeros in the diagonal and off-diagonal
3133         portions of the matrix in order to do correct preallocation
3134     */
3135 
3136     /* first get start and end of "diagonal" columns */
3137     if (csize == PETSC_DECIDE) {
3138       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3139       if (mglobal == n) { /* square matrix */
3140 	nlocal = m;
3141       } else {
3142         nlocal = n/size + ((n % size) > rank);
3143       }
3144     } else {
3145       nlocal = csize;
3146     }
3147     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3148     rstart = rend - nlocal;
3149     if (rank == size - 1 && rend != n) {
3150       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3151     }
3152 
3153     /* next, compute all the lengths */
3154     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3155     olens = dlens + m;
3156     for (i=0; i<m; i++) {
3157       jend = ii[i+1] - ii[i];
3158       olen = 0;
3159       dlen = 0;
3160       for (j=0; j<jend; j++) {
3161         if (*jj < rstart || *jj >= rend) olen++;
3162         else dlen++;
3163         jj++;
3164       }
3165       olens[i] = olen;
3166       dlens[i] = dlen;
3167     }
3168     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3169     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3170     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3171     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3172     ierr = PetscFree(dlens);CHKERRQ(ierr);
3173   } else {
3174     PetscInt ml,nl;
3175 
3176     M = *newmat;
3177     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3178     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3179     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3180     /*
3181          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3182        rather than the slower MatSetValues().
3183     */
3184     M->was_assembled = PETSC_TRUE;
3185     M->assembled     = PETSC_FALSE;
3186   }
3187   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3188   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3189   ii  = aij->i;
3190   jj  = aij->j;
3191   aa  = aij->a;
3192   for (i=0; i<m; i++) {
3193     row   = rstart + i;
3194     nz    = ii[i+1] - ii[i];
3195     cwork = jj;     jj += nz;
3196     vwork = aa;     aa += nz;
3197     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3198   }
3199 
3200   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3201   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3202   *newmat = M;
3203 
3204   /* save submatrix used in processor for next request */
3205   if (call ==  MAT_INITIAL_MATRIX) {
3206     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3207     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3208   }
3209 
3210   PetscFunctionReturn(0);
3211 }
3212 
3213 EXTERN_C_BEGIN
3214 #undef __FUNCT__
3215 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3216 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3217 {
3218   PetscInt       m,cstart, cend,j,nnz,i,d;
3219   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3220   const PetscInt *JJ;
3221   PetscScalar    *values;
3222   PetscErrorCode ierr;
3223 
3224   PetscFunctionBegin;
3225   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3226 
3227   ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3228   ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3229   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
3230   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
3231   m      = B->rmap->n;
3232   cstart = B->cmap->rstart;
3233   cend   = B->cmap->rend;
3234   rstart = B->rmap->rstart;
3235 
3236   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
3237   o_nnz = d_nnz + m;
3238 
3239 #if defined(PETSC_USE_DEBUGGING)
3240   for (i=0; i<m; i++) {
3241     nnz     = Ii[i+1]- Ii[i];
3242     JJ      = J + Ii[i];
3243     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3244     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3245     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3246     for (j=1; j<nnz; j++) {
3247       if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j);
3248     }
3249   }
3250 #endif
3251 
3252   for (i=0; i<m; i++) {
3253     nnz     = Ii[i+1]- Ii[i];
3254     JJ      = J + Ii[i];
3255     nnz_max = PetscMax(nnz_max,nnz);
3256     for (j=0; j<nnz; j++) {
3257       if (*JJ >= cstart) break;
3258       JJ++;
3259     }
3260     d = 0;
3261     for (; j<nnz; j++) {
3262       if (*JJ++ >= cend) break;
3263       d++;
3264     }
3265     d_nnz[i] = d;
3266     o_nnz[i] = nnz - d;
3267   }
3268   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3269   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
3270 
3271   if (v) values = (PetscScalar*)v;
3272   else {
3273     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3274     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3275   }
3276 
3277   for (i=0; i<m; i++) {
3278     ii   = i + rstart;
3279     nnz  = Ii[i+1]- Ii[i];
3280     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3281   }
3282   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3283   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3284 
3285   if (!v) {
3286     ierr = PetscFree(values);CHKERRQ(ierr);
3287   }
3288   PetscFunctionReturn(0);
3289 }
3290 EXTERN_C_END
3291 
3292 #undef __FUNCT__
3293 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3294 /*@
3295    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3296    (the default parallel PETSc format).
3297 
3298    Collective on MPI_Comm
3299 
3300    Input Parameters:
3301 +  B - the matrix
3302 .  i - the indices into j for the start of each local row (starts with zero)
3303 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3304 -  v - optional values in the matrix
3305 
3306    Level: developer
3307 
3308    Notes:
3309        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3310      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3311      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3312 
3313        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3314 
3315        The format which is used for the sparse matrix input, is equivalent to a
3316     row-major ordering.. i.e for the following matrix, the input data expected is
3317     as shown:
3318 
3319         1 0 0
3320         2 0 3     P0
3321        -------
3322         4 5 6     P1
3323 
3324      Process0 [P0]: rows_owned=[0,1]
3325         i =  {0,1,3}  [size = nrow+1  = 2+1]
3326         j =  {0,0,2}  [size = nz = 6]
3327         v =  {1,2,3}  [size = nz = 6]
3328 
3329      Process1 [P1]: rows_owned=[2]
3330         i =  {0,3}    [size = nrow+1  = 1+1]
3331         j =  {0,1,2}  [size = nz = 6]
3332         v =  {4,5,6}  [size = nz = 6]
3333 
3334       The column indices for each row MUST be sorted.
3335 
3336 .keywords: matrix, aij, compressed row, sparse, parallel
3337 
3338 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3339           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3340 @*/
3341 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3342 {
3343   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3344 
3345   PetscFunctionBegin;
3346   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3347   if (f) {
3348     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3349   }
3350   PetscFunctionReturn(0);
3351 }
3352 
3353 #undef __FUNCT__
3354 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3355 /*@C
3356    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3357    (the default parallel PETSc format).  For good matrix assembly performance
3358    the user should preallocate the matrix storage by setting the parameters
3359    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3360    performance can be increased by more than a factor of 50.
3361 
3362    Collective on MPI_Comm
3363 
3364    Input Parameters:
3365 +  A - the matrix
3366 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3367            (same value is used for all local rows)
3368 .  d_nnz - array containing the number of nonzeros in the various rows of the
3369            DIAGONAL portion of the local submatrix (possibly different for each row)
3370            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3371            The size of this array is equal to the number of local rows, i.e 'm'.
3372            You must leave room for the diagonal entry even if it is zero.
3373 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3374            submatrix (same value is used for all local rows).
3375 -  o_nnz - array containing the number of nonzeros in the various rows of the
3376            OFF-DIAGONAL portion of the local submatrix (possibly different for
3377            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3378            structure. The size of this array is equal to the number
3379            of local rows, i.e 'm'.
3380 
3381    If the *_nnz parameter is given then the *_nz parameter is ignored
3382 
3383    The AIJ format (also called the Yale sparse matrix format or
3384    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3385    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3386 
3387    The parallel matrix is partitioned such that the first m0 rows belong to
3388    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3389    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3390 
3391    The DIAGONAL portion of the local submatrix of a processor can be defined
3392    as the submatrix which is obtained by extraction the part corresponding
3393    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3394    first row that belongs to the processor, and r2 is the last row belonging
3395    to the this processor. This is a square mxm matrix. The remaining portion
3396    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3397 
3398    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3399 
3400    You can call MatGetInfo() to get information on how effective the preallocation was;
3401    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3402    You can also run with the option -info and look for messages with the string
3403    malloc in them to see if additional memory allocation was needed.
3404 
3405    Example usage:
3406 
3407    Consider the following 8x8 matrix with 34 non-zero values, that is
3408    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3409    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3410    as follows:
3411 
3412 .vb
3413             1  2  0  |  0  3  0  |  0  4
3414     Proc0   0  5  6  |  7  0  0  |  8  0
3415             9  0 10  | 11  0  0  | 12  0
3416     -------------------------------------
3417            13  0 14  | 15 16 17  |  0  0
3418     Proc1   0 18  0  | 19 20 21  |  0  0
3419             0  0  0  | 22 23  0  | 24  0
3420     -------------------------------------
3421     Proc2  25 26 27  |  0  0 28  | 29  0
3422            30  0  0  | 31 32 33  |  0 34
3423 .ve
3424 
3425    This can be represented as a collection of submatrices as:
3426 
3427 .vb
3428       A B C
3429       D E F
3430       G H I
3431 .ve
3432 
3433    Where the submatrices A,B,C are owned by proc0, D,E,F are
3434    owned by proc1, G,H,I are owned by proc2.
3435 
3436    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3437    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3438    The 'M','N' parameters are 8,8, and have the same values on all procs.
3439 
3440    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3441    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3442    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3443    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3444    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3445    matrix, ans [DF] as another SeqAIJ matrix.
3446 
3447    When d_nz, o_nz parameters are specified, d_nz storage elements are
3448    allocated for every row of the local diagonal submatrix, and o_nz
3449    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3450    One way to choose d_nz and o_nz is to use the max nonzerors per local
3451    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3452    In this case, the values of d_nz,o_nz are:
3453 .vb
3454      proc0 : dnz = 2, o_nz = 2
3455      proc1 : dnz = 3, o_nz = 2
3456      proc2 : dnz = 1, o_nz = 4
3457 .ve
3458    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3459    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3460    for proc3. i.e we are using 12+15+10=37 storage locations to store
3461    34 values.
3462 
3463    When d_nnz, o_nnz parameters are specified, the storage is specified
3464    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3465    In the above case the values for d_nnz,o_nnz are:
3466 .vb
3467      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3468      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3469      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3470 .ve
3471    Here the space allocated is sum of all the above values i.e 34, and
3472    hence pre-allocation is perfect.
3473 
3474    Level: intermediate
3475 
3476 .keywords: matrix, aij, compressed row, sparse, parallel
3477 
3478 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3479           MPIAIJ, MatGetInfo()
3480 @*/
3481 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3482 {
3483   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3484 
3485   PetscFunctionBegin;
3486   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3487   if (f) {
3488     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3489   }
3490   PetscFunctionReturn(0);
3491 }
3492 
3493 #undef __FUNCT__
3494 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3495 /*@
3496      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3497          CSR format the local rows.
3498 
3499    Collective on MPI_Comm
3500 
3501    Input Parameters:
3502 +  comm - MPI communicator
3503 .  m - number of local rows (Cannot be PETSC_DECIDE)
3504 .  n - This value should be the same as the local size used in creating the
3505        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3506        calculated if N is given) For square matrices n is almost always m.
3507 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3508 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3509 .   i - row indices
3510 .   j - column indices
3511 -   a - matrix values
3512 
3513    Output Parameter:
3514 .   mat - the matrix
3515 
3516    Level: intermediate
3517 
3518    Notes:
3519        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3520      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3521      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3522 
3523        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3524 
3525        The format which is used for the sparse matrix input, is equivalent to a
3526     row-major ordering.. i.e for the following matrix, the input data expected is
3527     as shown:
3528 
3529         1 0 0
3530         2 0 3     P0
3531        -------
3532         4 5 6     P1
3533 
3534      Process0 [P0]: rows_owned=[0,1]
3535         i =  {0,1,3}  [size = nrow+1  = 2+1]
3536         j =  {0,0,2}  [size = nz = 6]
3537         v =  {1,2,3}  [size = nz = 6]
3538 
3539      Process1 [P1]: rows_owned=[2]
3540         i =  {0,3}    [size = nrow+1  = 1+1]
3541         j =  {0,1,2}  [size = nz = 6]
3542         v =  {4,5,6}  [size = nz = 6]
3543 
3544 .keywords: matrix, aij, compressed row, sparse, parallel
3545 
3546 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3547           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3548 @*/
3549 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3550 {
3551   PetscErrorCode ierr;
3552 
3553  PetscFunctionBegin;
3554   if (i[0]) {
3555     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3556   }
3557   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3558   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3559   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3560   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3561   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3562   PetscFunctionReturn(0);
3563 }
3564 
3565 #undef __FUNCT__
3566 #define __FUNCT__ "MatCreateMPIAIJ"
3567 /*@C
3568    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3569    (the default parallel PETSc format).  For good matrix assembly performance
3570    the user should preallocate the matrix storage by setting the parameters
3571    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3572    performance can be increased by more than a factor of 50.
3573 
3574    Collective on MPI_Comm
3575 
3576    Input Parameters:
3577 +  comm - MPI communicator
3578 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3579            This value should be the same as the local size used in creating the
3580            y vector for the matrix-vector product y = Ax.
3581 .  n - This value should be the same as the local size used in creating the
3582        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3583        calculated if N is given) For square matrices n is almost always m.
3584 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3585 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3586 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3587            (same value is used for all local rows)
3588 .  d_nnz - array containing the number of nonzeros in the various rows of the
3589            DIAGONAL portion of the local submatrix (possibly different for each row)
3590            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3591            The size of this array is equal to the number of local rows, i.e 'm'.
3592            You must leave room for the diagonal entry even if it is zero.
3593 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3594            submatrix (same value is used for all local rows).
3595 -  o_nnz - array containing the number of nonzeros in the various rows of the
3596            OFF-DIAGONAL portion of the local submatrix (possibly different for
3597            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3598            structure. The size of this array is equal to the number
3599            of local rows, i.e 'm'.
3600 
3601    Output Parameter:
3602 .  A - the matrix
3603 
3604    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3605    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3606    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3607 
3608    Notes:
3609    If the *_nnz parameter is given then the *_nz parameter is ignored
3610 
3611    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3612    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3613    storage requirements for this matrix.
3614 
3615    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3616    processor than it must be used on all processors that share the object for
3617    that argument.
3618 
3619    The user MUST specify either the local or global matrix dimensions
3620    (possibly both).
3621 
3622    The parallel matrix is partitioned across processors such that the
3623    first m0 rows belong to process 0, the next m1 rows belong to
3624    process 1, the next m2 rows belong to process 2 etc.. where
3625    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3626    values corresponding to [m x N] submatrix.
3627 
3628    The columns are logically partitioned with the n0 columns belonging
3629    to 0th partition, the next n1 columns belonging to the next
3630    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3631 
3632    The DIAGONAL portion of the local submatrix on any given processor
3633    is the submatrix corresponding to the rows and columns m,n
3634    corresponding to the given processor. i.e diagonal matrix on
3635    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3636    etc. The remaining portion of the local submatrix [m x (N-n)]
3637    constitute the OFF-DIAGONAL portion. The example below better
3638    illustrates this concept.
3639 
3640    For a square global matrix we define each processor's diagonal portion
3641    to be its local rows and the corresponding columns (a square submatrix);
3642    each processor's off-diagonal portion encompasses the remainder of the
3643    local matrix (a rectangular submatrix).
3644 
3645    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3646 
3647    When calling this routine with a single process communicator, a matrix of
3648    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3649    type of communicator, use the construction mechanism:
3650      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3651 
3652    By default, this format uses inodes (identical nodes) when possible.
3653    We search for consecutive rows with the same nonzero structure, thereby
3654    reusing matrix information to achieve increased efficiency.
3655 
3656    Options Database Keys:
3657 +  -mat_no_inode  - Do not use inodes
3658 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3659 -  -mat_aij_oneindex - Internally use indexing starting at 1
3660         rather than 0.  Note that when calling MatSetValues(),
3661         the user still MUST index entries starting at 0!
3662 
3663 
3664    Example usage:
3665 
3666    Consider the following 8x8 matrix with 34 non-zero values, that is
3667    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3668    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3669    as follows:
3670 
3671 .vb
3672             1  2  0  |  0  3  0  |  0  4
3673     Proc0   0  5  6  |  7  0  0  |  8  0
3674             9  0 10  | 11  0  0  | 12  0
3675     -------------------------------------
3676            13  0 14  | 15 16 17  |  0  0
3677     Proc1   0 18  0  | 19 20 21  |  0  0
3678             0  0  0  | 22 23  0  | 24  0
3679     -------------------------------------
3680     Proc2  25 26 27  |  0  0 28  | 29  0
3681            30  0  0  | 31 32 33  |  0 34
3682 .ve
3683 
3684    This can be represented as a collection of submatrices as:
3685 
3686 .vb
3687       A B C
3688       D E F
3689       G H I
3690 .ve
3691 
3692    Where the submatrices A,B,C are owned by proc0, D,E,F are
3693    owned by proc1, G,H,I are owned by proc2.
3694 
3695    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3696    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3697    The 'M','N' parameters are 8,8, and have the same values on all procs.
3698 
3699    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3700    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3701    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3702    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3703    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3704    matrix, ans [DF] as another SeqAIJ matrix.
3705 
3706    When d_nz, o_nz parameters are specified, d_nz storage elements are
3707    allocated for every row of the local diagonal submatrix, and o_nz
3708    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3709    One way to choose d_nz and o_nz is to use the max nonzerors per local
3710    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3711    In this case, the values of d_nz,o_nz are:
3712 .vb
3713      proc0 : dnz = 2, o_nz = 2
3714      proc1 : dnz = 3, o_nz = 2
3715      proc2 : dnz = 1, o_nz = 4
3716 .ve
3717    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3718    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3719    for proc3. i.e we are using 12+15+10=37 storage locations to store
3720    34 values.
3721 
3722    When d_nnz, o_nnz parameters are specified, the storage is specified
3723    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3724    In the above case the values for d_nnz,o_nnz are:
3725 .vb
3726      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3727      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3728      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3729 .ve
3730    Here the space allocated is sum of all the above values i.e 34, and
3731    hence pre-allocation is perfect.
3732 
3733    Level: intermediate
3734 
3735 .keywords: matrix, aij, compressed row, sparse, parallel
3736 
3737 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3738           MPIAIJ, MatCreateMPIAIJWithArrays()
3739 @*/
3740 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3741 {
3742   PetscErrorCode ierr;
3743   PetscMPIInt    size;
3744 
3745   PetscFunctionBegin;
3746   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3747   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3748   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3749   if (size > 1) {
3750     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3751     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3752   } else {
3753     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3754     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3755   }
3756   PetscFunctionReturn(0);
3757 }
3758 
3759 #undef __FUNCT__
3760 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3761 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3762 {
3763   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3764 
3765   PetscFunctionBegin;
3766   *Ad     = a->A;
3767   *Ao     = a->B;
3768   *colmap = a->garray;
3769   PetscFunctionReturn(0);
3770 }
3771 
3772 #undef __FUNCT__
3773 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3774 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3775 {
3776   PetscErrorCode ierr;
3777   PetscInt       i;
3778   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3779 
3780   PetscFunctionBegin;
3781   if (coloring->ctype == IS_COLORING_GLOBAL) {
3782     ISColoringValue *allcolors,*colors;
3783     ISColoring      ocoloring;
3784 
3785     /* set coloring for diagonal portion */
3786     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3787 
3788     /* set coloring for off-diagonal portion */
3789     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3790     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3791     for (i=0; i<a->B->cmap->n; i++) {
3792       colors[i] = allcolors[a->garray[i]];
3793     }
3794     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3795     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3796     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3797     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3798   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3799     ISColoringValue *colors;
3800     PetscInt        *larray;
3801     ISColoring      ocoloring;
3802 
3803     /* set coloring for diagonal portion */
3804     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3805     for (i=0; i<a->A->cmap->n; i++) {
3806       larray[i] = i + A->cmap->rstart;
3807     }
3808     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3809     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3810     for (i=0; i<a->A->cmap->n; i++) {
3811       colors[i] = coloring->colors[larray[i]];
3812     }
3813     ierr = PetscFree(larray);CHKERRQ(ierr);
3814     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3815     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3816     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3817 
3818     /* set coloring for off-diagonal portion */
3819     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3820     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3821     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3822     for (i=0; i<a->B->cmap->n; i++) {
3823       colors[i] = coloring->colors[larray[i]];
3824     }
3825     ierr = PetscFree(larray);CHKERRQ(ierr);
3826     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3827     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3828     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3829   } else {
3830     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3831   }
3832 
3833   PetscFunctionReturn(0);
3834 }
3835 
3836 #if defined(PETSC_HAVE_ADIC)
3837 #undef __FUNCT__
3838 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3839 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3840 {
3841   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3842   PetscErrorCode ierr;
3843 
3844   PetscFunctionBegin;
3845   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3846   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3847   PetscFunctionReturn(0);
3848 }
3849 #endif
3850 
3851 #undef __FUNCT__
3852 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3853 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3854 {
3855   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3856   PetscErrorCode ierr;
3857 
3858   PetscFunctionBegin;
3859   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3860   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3861   PetscFunctionReturn(0);
3862 }
3863 
3864 #undef __FUNCT__
3865 #define __FUNCT__ "MatMerge"
3866 /*@
3867       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3868                  matrices from each processor
3869 
3870     Collective on MPI_Comm
3871 
3872    Input Parameters:
3873 +    comm - the communicators the parallel matrix will live on
3874 .    inmat - the input sequential matrices
3875 .    n - number of local columns (or PETSC_DECIDE)
3876 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3877 
3878    Output Parameter:
3879 .    outmat - the parallel matrix generated
3880 
3881     Level: advanced
3882 
3883    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3884 
3885 @*/
3886 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3887 {
3888   PetscErrorCode ierr;
3889   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3890   PetscInt       *indx;
3891   PetscScalar    *values;
3892 
3893   PetscFunctionBegin;
3894   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3895   if (scall == MAT_INITIAL_MATRIX){
3896     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3897     if (n == PETSC_DECIDE){
3898       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3899     }
3900     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3901     rstart -= m;
3902 
3903     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3904     for (i=0;i<m;i++) {
3905       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3906       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3907       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3908     }
3909     /* This routine will ONLY return MPIAIJ type matrix */
3910     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3911     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3912     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3913     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3914     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3915 
3916   } else if (scall == MAT_REUSE_MATRIX){
3917     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3918   } else {
3919     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3920   }
3921 
3922   for (i=0;i<m;i++) {
3923     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3924     Ii    = i + rstart;
3925     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3926     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3927   }
3928   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3929   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3930   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3931 
3932   PetscFunctionReturn(0);
3933 }
3934 
3935 #undef __FUNCT__
3936 #define __FUNCT__ "MatFileSplit"
3937 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3938 {
3939   PetscErrorCode    ierr;
3940   PetscMPIInt       rank;
3941   PetscInt          m,N,i,rstart,nnz;
3942   size_t            len;
3943   const PetscInt    *indx;
3944   PetscViewer       out;
3945   char              *name;
3946   Mat               B;
3947   const PetscScalar *values;
3948 
3949   PetscFunctionBegin;
3950   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3951   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3952   /* Should this be the type of the diagonal block of A? */
3953   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3954   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3955   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3956   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3957   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3958   for (i=0;i<m;i++) {
3959     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3960     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3961     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3962   }
3963   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3964   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3965 
3966   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3967   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3968   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3969   sprintf(name,"%s.%d",outfile,rank);
3970   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3971   ierr = PetscFree(name);
3972   ierr = MatView(B,out);CHKERRQ(ierr);
3973   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3974   ierr = MatDestroy(B);CHKERRQ(ierr);
3975   PetscFunctionReturn(0);
3976 }
3977 
3978 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3979 #undef __FUNCT__
3980 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3981 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3982 {
3983   PetscErrorCode       ierr;
3984   Mat_Merge_SeqsToMPI  *merge;
3985   PetscContainer       container;
3986 
3987   PetscFunctionBegin;
3988   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3989   if (container) {
3990     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3991     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3992     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3993     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3994     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3995     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3996     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3997     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3998     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3999     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4000     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4001     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
4002 
4003     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4004     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4005   }
4006   ierr = PetscFree(merge);CHKERRQ(ierr);
4007 
4008   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4009   PetscFunctionReturn(0);
4010 }
4011 
4012 #include "../src/mat/utils/freespace.h"
4013 #include "petscbt.h"
4014 
4015 #undef __FUNCT__
4016 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4017 /*@C
4018       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4019                  matrices from each processor
4020 
4021     Collective on MPI_Comm
4022 
4023    Input Parameters:
4024 +    comm - the communicators the parallel matrix will live on
4025 .    seqmat - the input sequential matrices
4026 .    m - number of local rows (or PETSC_DECIDE)
4027 .    n - number of local columns (or PETSC_DECIDE)
4028 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4029 
4030    Output Parameter:
4031 .    mpimat - the parallel matrix generated
4032 
4033     Level: advanced
4034 
4035    Notes:
4036      The dimensions of the sequential matrix in each processor MUST be the same.
4037      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4038      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4039 @*/
4040 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4041 {
4042   PetscErrorCode       ierr;
4043   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4044   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4045   PetscMPIInt          size,rank,taga,*len_s;
4046   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4047   PetscInt             proc,m;
4048   PetscInt             **buf_ri,**buf_rj;
4049   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4050   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4051   MPI_Request          *s_waits,*r_waits;
4052   MPI_Status           *status;
4053   MatScalar            *aa=a->a;
4054   MatScalar            **abuf_r,*ba_i;
4055   Mat_Merge_SeqsToMPI  *merge;
4056   PetscContainer       container;
4057 
4058   PetscFunctionBegin;
4059   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4060 
4061   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4062   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4063 
4064   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4065   if (container) {
4066     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4067   }
4068   bi     = merge->bi;
4069   bj     = merge->bj;
4070   buf_ri = merge->buf_ri;
4071   buf_rj = merge->buf_rj;
4072 
4073   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4074   owners = merge->rowmap.range;
4075   len_s  = merge->len_s;
4076 
4077   /* send and recv matrix values */
4078   /*-----------------------------*/
4079   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4080   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4081 
4082   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4083   for (proc=0,k=0; proc<size; proc++){
4084     if (!len_s[proc]) continue;
4085     i = owners[proc];
4086     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4087     k++;
4088   }
4089 
4090   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4091   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4092   ierr = PetscFree(status);CHKERRQ(ierr);
4093 
4094   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4095   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4096 
4097   /* insert mat values of mpimat */
4098   /*----------------------------*/
4099   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4100   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4101   nextrow = buf_ri_k + merge->nrecv;
4102   nextai  = nextrow + merge->nrecv;
4103 
4104   for (k=0; k<merge->nrecv; k++){
4105     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4106     nrows = *(buf_ri_k[k]);
4107     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4108     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4109   }
4110 
4111   /* set values of ba */
4112   m = merge->rowmap.n;
4113   for (i=0; i<m; i++) {
4114     arow = owners[rank] + i;
4115     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4116     bnzi = bi[i+1] - bi[i];
4117     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4118 
4119     /* add local non-zero vals of this proc's seqmat into ba */
4120     anzi = ai[arow+1] - ai[arow];
4121     aj   = a->j + ai[arow];
4122     aa   = a->a + ai[arow];
4123     nextaj = 0;
4124     for (j=0; nextaj<anzi; j++){
4125       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4126         ba_i[j] += aa[nextaj++];
4127       }
4128     }
4129 
4130     /* add received vals into ba */
4131     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4132       /* i-th row */
4133       if (i == *nextrow[k]) {
4134         anzi = *(nextai[k]+1) - *nextai[k];
4135         aj   = buf_rj[k] + *(nextai[k]);
4136         aa   = abuf_r[k] + *(nextai[k]);
4137         nextaj = 0;
4138         for (j=0; nextaj<anzi; j++){
4139           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4140             ba_i[j] += aa[nextaj++];
4141           }
4142         }
4143         nextrow[k]++; nextai[k]++;
4144       }
4145     }
4146     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4147   }
4148   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4149   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4150 
4151   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4152   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4153   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4154   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4155   PetscFunctionReturn(0);
4156 }
4157 
4158 #undef __FUNCT__
4159 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4160 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4161 {
4162   PetscErrorCode       ierr;
4163   Mat                  B_mpi;
4164   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4165   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4166   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4167   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4168   PetscInt             len,proc,*dnz,*onz;
4169   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4170   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4171   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4172   MPI_Status           *status;
4173   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4174   PetscBT              lnkbt;
4175   Mat_Merge_SeqsToMPI  *merge;
4176   PetscContainer       container;
4177 
4178   PetscFunctionBegin;
4179   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4180 
4181   /* make sure it is a PETSc comm */
4182   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4183   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4184   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4185 
4186   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4187   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4188 
4189   /* determine row ownership */
4190   /*---------------------------------------------------------*/
4191   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
4192   merge->rowmap.n = m;
4193   merge->rowmap.N = M;
4194   merge->rowmap.bs = 1;
4195   ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr);
4196   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4197   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4198 
4199   m      = merge->rowmap.n;
4200   M      = merge->rowmap.N;
4201   owners = merge->rowmap.range;
4202 
4203   /* determine the number of messages to send, their lengths */
4204   /*---------------------------------------------------------*/
4205   len_s  = merge->len_s;
4206 
4207   len = 0;  /* length of buf_si[] */
4208   merge->nsend = 0;
4209   for (proc=0; proc<size; proc++){
4210     len_si[proc] = 0;
4211     if (proc == rank){
4212       len_s[proc] = 0;
4213     } else {
4214       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4215       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4216     }
4217     if (len_s[proc]) {
4218       merge->nsend++;
4219       nrows = 0;
4220       for (i=owners[proc]; i<owners[proc+1]; i++){
4221         if (ai[i+1] > ai[i]) nrows++;
4222       }
4223       len_si[proc] = 2*(nrows+1);
4224       len += len_si[proc];
4225     }
4226   }
4227 
4228   /* determine the number and length of messages to receive for ij-structure */
4229   /*-------------------------------------------------------------------------*/
4230   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4231   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4232 
4233   /* post the Irecv of j-structure */
4234   /*-------------------------------*/
4235   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4236   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4237 
4238   /* post the Isend of j-structure */
4239   /*--------------------------------*/
4240   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
4241   sj_waits = si_waits + merge->nsend;
4242 
4243   for (proc=0, k=0; proc<size; proc++){
4244     if (!len_s[proc]) continue;
4245     i = owners[proc];
4246     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4247     k++;
4248   }
4249 
4250   /* receives and sends of j-structure are complete */
4251   /*------------------------------------------------*/
4252   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4253   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4254 
4255   /* send and recv i-structure */
4256   /*---------------------------*/
4257   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4258   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4259 
4260   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4261   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4262   for (proc=0,k=0; proc<size; proc++){
4263     if (!len_s[proc]) continue;
4264     /* form outgoing message for i-structure:
4265          buf_si[0]:                 nrows to be sent
4266                [1:nrows]:           row index (global)
4267                [nrows+1:2*nrows+1]: i-structure index
4268     */
4269     /*-------------------------------------------*/
4270     nrows = len_si[proc]/2 - 1;
4271     buf_si_i    = buf_si + nrows+1;
4272     buf_si[0]   = nrows;
4273     buf_si_i[0] = 0;
4274     nrows = 0;
4275     for (i=owners[proc]; i<owners[proc+1]; i++){
4276       anzi = ai[i+1] - ai[i];
4277       if (anzi) {
4278         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4279         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4280         nrows++;
4281       }
4282     }
4283     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4284     k++;
4285     buf_si += len_si[proc];
4286   }
4287 
4288   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4289   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4290 
4291   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4292   for (i=0; i<merge->nrecv; i++){
4293     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4294   }
4295 
4296   ierr = PetscFree(len_si);CHKERRQ(ierr);
4297   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4298   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4299   ierr = PetscFree(si_waits);CHKERRQ(ierr);
4300   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4301   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4302   ierr = PetscFree(status);CHKERRQ(ierr);
4303 
4304   /* compute a local seq matrix in each processor */
4305   /*----------------------------------------------*/
4306   /* allocate bi array and free space for accumulating nonzero column info */
4307   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4308   bi[0] = 0;
4309 
4310   /* create and initialize a linked list */
4311   nlnk = N+1;
4312   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4313 
4314   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4315   len = 0;
4316   len  = ai[owners[rank+1]] - ai[owners[rank]];
4317   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4318   current_space = free_space;
4319 
4320   /* determine symbolic info for each local row */
4321   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4322   nextrow = buf_ri_k + merge->nrecv;
4323   nextai  = nextrow + merge->nrecv;
4324   for (k=0; k<merge->nrecv; k++){
4325     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4326     nrows = *buf_ri_k[k];
4327     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4328     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4329   }
4330 
4331   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4332   len = 0;
4333   for (i=0;i<m;i++) {
4334     bnzi   = 0;
4335     /* add local non-zero cols of this proc's seqmat into lnk */
4336     arow   = owners[rank] + i;
4337     anzi   = ai[arow+1] - ai[arow];
4338     aj     = a->j + ai[arow];
4339     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4340     bnzi += nlnk;
4341     /* add received col data into lnk */
4342     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4343       if (i == *nextrow[k]) { /* i-th row */
4344         anzi = *(nextai[k]+1) - *nextai[k];
4345         aj   = buf_rj[k] + *nextai[k];
4346         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4347         bnzi += nlnk;
4348         nextrow[k]++; nextai[k]++;
4349       }
4350     }
4351     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4352 
4353     /* if free space is not available, make more free space */
4354     if (current_space->local_remaining<bnzi) {
4355       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4356       nspacedouble++;
4357     }
4358     /* copy data into free space, then initialize lnk */
4359     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4360     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4361 
4362     current_space->array           += bnzi;
4363     current_space->local_used      += bnzi;
4364     current_space->local_remaining -= bnzi;
4365 
4366     bi[i+1] = bi[i] + bnzi;
4367   }
4368 
4369   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4370 
4371   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4372   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4373   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4374 
4375   /* create symbolic parallel matrix B_mpi */
4376   /*---------------------------------------*/
4377   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4378   if (n==PETSC_DECIDE) {
4379     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4380   } else {
4381     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4382   }
4383   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4384   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4385   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4386 
4387   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4388   B_mpi->assembled     = PETSC_FALSE;
4389   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4390   merge->bi            = bi;
4391   merge->bj            = bj;
4392   merge->buf_ri        = buf_ri;
4393   merge->buf_rj        = buf_rj;
4394   merge->coi           = PETSC_NULL;
4395   merge->coj           = PETSC_NULL;
4396   merge->owners_co     = PETSC_NULL;
4397 
4398   /* attach the supporting struct to B_mpi for reuse */
4399   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4400   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4401   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4402   *mpimat = B_mpi;
4403 
4404   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4405   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4406   PetscFunctionReturn(0);
4407 }
4408 
4409 #undef __FUNCT__
4410 #define __FUNCT__ "MatMerge_SeqsToMPI"
4411 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4412 {
4413   PetscErrorCode   ierr;
4414 
4415   PetscFunctionBegin;
4416   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4417   if (scall == MAT_INITIAL_MATRIX){
4418     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4419   }
4420   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4421   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4422   PetscFunctionReturn(0);
4423 }
4424 
4425 #undef __FUNCT__
4426 #define __FUNCT__ "MatGetLocalMat"
4427 /*@
4428      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4429 
4430     Not Collective
4431 
4432    Input Parameters:
4433 +    A - the matrix
4434 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4435 
4436    Output Parameter:
4437 .    A_loc - the local sequential matrix generated
4438 
4439     Level: developer
4440 
4441 @*/
4442 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4443 {
4444   PetscErrorCode  ierr;
4445   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4446   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4447   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4448   MatScalar       *aa=a->a,*ba=b->a,*cam;
4449   PetscScalar     *ca;
4450   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4451   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4452 
4453   PetscFunctionBegin;
4454   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4455   if (scall == MAT_INITIAL_MATRIX){
4456     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4457     ci[0] = 0;
4458     for (i=0; i<am; i++){
4459       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4460     }
4461     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4462     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4463     k = 0;
4464     for (i=0; i<am; i++) {
4465       ncols_o = bi[i+1] - bi[i];
4466       ncols_d = ai[i+1] - ai[i];
4467       /* off-diagonal portion of A */
4468       for (jo=0; jo<ncols_o; jo++) {
4469         col = cmap[*bj];
4470         if (col >= cstart) break;
4471         cj[k]   = col; bj++;
4472         ca[k++] = *ba++;
4473       }
4474       /* diagonal portion of A */
4475       for (j=0; j<ncols_d; j++) {
4476         cj[k]   = cstart + *aj++;
4477         ca[k++] = *aa++;
4478       }
4479       /* off-diagonal portion of A */
4480       for (j=jo; j<ncols_o; j++) {
4481         cj[k]   = cmap[*bj++];
4482         ca[k++] = *ba++;
4483       }
4484     }
4485     /* put together the new matrix */
4486     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4487     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4488     /* Since these are PETSc arrays, change flags to free them as necessary. */
4489     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4490     mat->free_a  = PETSC_TRUE;
4491     mat->free_ij = PETSC_TRUE;
4492     mat->nonew   = 0;
4493   } else if (scall == MAT_REUSE_MATRIX){
4494     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4495     ci = mat->i; cj = mat->j; cam = mat->a;
4496     for (i=0; i<am; i++) {
4497       /* off-diagonal portion of A */
4498       ncols_o = bi[i+1] - bi[i];
4499       for (jo=0; jo<ncols_o; jo++) {
4500         col = cmap[*bj];
4501         if (col >= cstart) break;
4502         *cam++ = *ba++; bj++;
4503       }
4504       /* diagonal portion of A */
4505       ncols_d = ai[i+1] - ai[i];
4506       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4507       /* off-diagonal portion of A */
4508       for (j=jo; j<ncols_o; j++) {
4509         *cam++ = *ba++; bj++;
4510       }
4511     }
4512   } else {
4513     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4514   }
4515 
4516   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4517   PetscFunctionReturn(0);
4518 }
4519 
4520 #undef __FUNCT__
4521 #define __FUNCT__ "MatGetLocalMatCondensed"
4522 /*@C
4523      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4524 
4525     Not Collective
4526 
4527    Input Parameters:
4528 +    A - the matrix
4529 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4530 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4531 
4532    Output Parameter:
4533 .    A_loc - the local sequential matrix generated
4534 
4535     Level: developer
4536 
4537 @*/
4538 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4539 {
4540   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4541   PetscErrorCode    ierr;
4542   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4543   IS                isrowa,iscola;
4544   Mat               *aloc;
4545 
4546   PetscFunctionBegin;
4547   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4548   if (!row){
4549     start = A->rmap->rstart; end = A->rmap->rend;
4550     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4551   } else {
4552     isrowa = *row;
4553   }
4554   if (!col){
4555     start = A->cmap->rstart;
4556     cmap  = a->garray;
4557     nzA   = a->A->cmap->n;
4558     nzB   = a->B->cmap->n;
4559     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4560     ncols = 0;
4561     for (i=0; i<nzB; i++) {
4562       if (cmap[i] < start) idx[ncols++] = cmap[i];
4563       else break;
4564     }
4565     imark = i;
4566     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4567     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4568     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4569     ierr = PetscFree(idx);CHKERRQ(ierr);
4570   } else {
4571     iscola = *col;
4572   }
4573   if (scall != MAT_INITIAL_MATRIX){
4574     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4575     aloc[0] = *A_loc;
4576   }
4577   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4578   *A_loc = aloc[0];
4579   ierr = PetscFree(aloc);CHKERRQ(ierr);
4580   if (!row){
4581     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4582   }
4583   if (!col){
4584     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4585   }
4586   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4587   PetscFunctionReturn(0);
4588 }
4589 
4590 #undef __FUNCT__
4591 #define __FUNCT__ "MatGetBrowsOfAcols"
4592 /*@C
4593     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4594 
4595     Collective on Mat
4596 
4597    Input Parameters:
4598 +    A,B - the matrices in mpiaij format
4599 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4600 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4601 
4602    Output Parameter:
4603 +    rowb, colb - index sets of rows and columns of B to extract
4604 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4605 -    B_seq - the sequential matrix generated
4606 
4607     Level: developer
4608 
4609 @*/
4610 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4611 {
4612   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4613   PetscErrorCode    ierr;
4614   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4615   IS                isrowb,iscolb;
4616   Mat               *bseq;
4617 
4618   PetscFunctionBegin;
4619   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4620     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4621   }
4622   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4623 
4624   if (scall == MAT_INITIAL_MATRIX){
4625     start = A->cmap->rstart;
4626     cmap  = a->garray;
4627     nzA   = a->A->cmap->n;
4628     nzB   = a->B->cmap->n;
4629     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4630     ncols = 0;
4631     for (i=0; i<nzB; i++) {  /* row < local row index */
4632       if (cmap[i] < start) idx[ncols++] = cmap[i];
4633       else break;
4634     }
4635     imark = i;
4636     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4637     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4638     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4639     ierr = PetscFree(idx);CHKERRQ(ierr);
4640     *brstart = imark;
4641     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4642   } else {
4643     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4644     isrowb = *rowb; iscolb = *colb;
4645     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4646     bseq[0] = *B_seq;
4647   }
4648   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4649   *B_seq = bseq[0];
4650   ierr = PetscFree(bseq);CHKERRQ(ierr);
4651   if (!rowb){
4652     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4653   } else {
4654     *rowb = isrowb;
4655   }
4656   if (!colb){
4657     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4658   } else {
4659     *colb = iscolb;
4660   }
4661   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4662   PetscFunctionReturn(0);
4663 }
4664 
4665 #undef __FUNCT__
4666 #define __FUNCT__ "MatGetBrowsOfAoCols"
4667 /*@C
4668     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4669     of the OFF-DIAGONAL portion of local A
4670 
4671     Collective on Mat
4672 
4673    Input Parameters:
4674 +    A,B - the matrices in mpiaij format
4675 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4676 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4677 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4678 
4679    Output Parameter:
4680 +    B_oth - the sequential matrix generated
4681 
4682     Level: developer
4683 
4684 @*/
4685 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth)
4686 {
4687   VecScatter_MPI_General *gen_to,*gen_from;
4688   PetscErrorCode         ierr;
4689   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4690   Mat_SeqAIJ             *b_oth;
4691   VecScatter             ctx=a->Mvctx;
4692   MPI_Comm               comm=((PetscObject)ctx)->comm;
4693   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4694   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4695   PetscScalar            *rvalues,*svalues;
4696   MatScalar              *b_otha,*bufa,*bufA;
4697   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4698   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4699   MPI_Status             *sstatus,rstatus;
4700   PetscMPIInt            jj;
4701   PetscInt               *cols,sbs,rbs;
4702   PetscScalar            *vals;
4703 
4704   PetscFunctionBegin;
4705   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4706     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4707   }
4708   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4709   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4710 
4711   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4712   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4713   rvalues  = gen_from->values; /* holds the length of receiving row */
4714   svalues  = gen_to->values;   /* holds the length of sending row */
4715   nrecvs   = gen_from->n;
4716   nsends   = gen_to->n;
4717 
4718   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4719   srow     = gen_to->indices;   /* local row index to be sent */
4720   sstarts  = gen_to->starts;
4721   sprocs   = gen_to->procs;
4722   sstatus  = gen_to->sstatus;
4723   sbs      = gen_to->bs;
4724   rstarts  = gen_from->starts;
4725   rprocs   = gen_from->procs;
4726   rbs      = gen_from->bs;
4727 
4728   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4729   if (scall == MAT_INITIAL_MATRIX){
4730     /* i-array */
4731     /*---------*/
4732     /*  post receives */
4733     for (i=0; i<nrecvs; i++){
4734       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4735       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4736       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4737     }
4738 
4739     /* pack the outgoing message */
4740     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4741     rstartsj = sstartsj + nsends +1;
4742     sstartsj[0] = 0;  rstartsj[0] = 0;
4743     len = 0; /* total length of j or a array to be sent */
4744     k = 0;
4745     for (i=0; i<nsends; i++){
4746       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4747       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4748       for (j=0; j<nrows; j++) {
4749         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4750         for (l=0; l<sbs; l++){
4751           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4752           rowlen[j*sbs+l] = ncols;
4753           len += ncols;
4754           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4755         }
4756         k++;
4757       }
4758       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4759       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4760     }
4761     /* recvs and sends of i-array are completed */
4762     i = nrecvs;
4763     while (i--) {
4764       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4765     }
4766     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4767 
4768     /* allocate buffers for sending j and a arrays */
4769     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4770     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4771 
4772     /* create i-array of B_oth */
4773     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4774     b_othi[0] = 0;
4775     len = 0; /* total length of j or a array to be received */
4776     k = 0;
4777     for (i=0; i<nrecvs; i++){
4778       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4779       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4780       for (j=0; j<nrows; j++) {
4781         b_othi[k+1] = b_othi[k] + rowlen[j];
4782         len += rowlen[j]; k++;
4783       }
4784       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4785     }
4786 
4787     /* allocate space for j and a arrrays of B_oth */
4788     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4789     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4790 
4791     /* j-array */
4792     /*---------*/
4793     /*  post receives of j-array */
4794     for (i=0; i<nrecvs; i++){
4795       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4796       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4797     }
4798 
4799     /* pack the outgoing message j-array */
4800     k = 0;
4801     for (i=0; i<nsends; i++){
4802       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4803       bufJ = bufj+sstartsj[i];
4804       for (j=0; j<nrows; j++) {
4805         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4806         for (ll=0; ll<sbs; ll++){
4807           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4808           for (l=0; l<ncols; l++){
4809             *bufJ++ = cols[l];
4810           }
4811           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4812         }
4813       }
4814       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4815     }
4816 
4817     /* recvs and sends of j-array are completed */
4818     i = nrecvs;
4819     while (i--) {
4820       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4821     }
4822     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4823   } else if (scall == MAT_REUSE_MATRIX){
4824     sstartsj = *startsj;
4825     rstartsj = sstartsj + nsends +1;
4826     bufa     = *bufa_ptr;
4827     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4828     b_otha   = b_oth->a;
4829   } else {
4830     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4831   }
4832 
4833   /* a-array */
4834   /*---------*/
4835   /*  post receives of a-array */
4836   for (i=0; i<nrecvs; i++){
4837     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4838     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4839   }
4840 
4841   /* pack the outgoing message a-array */
4842   k = 0;
4843   for (i=0; i<nsends; i++){
4844     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4845     bufA = bufa+sstartsj[i];
4846     for (j=0; j<nrows; j++) {
4847       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4848       for (ll=0; ll<sbs; ll++){
4849         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4850         for (l=0; l<ncols; l++){
4851           *bufA++ = vals[l];
4852         }
4853         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4854       }
4855     }
4856     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4857   }
4858   /* recvs and sends of a-array are completed */
4859   i = nrecvs;
4860   while (i--) {
4861     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4862   }
4863   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4864   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4865 
4866   if (scall == MAT_INITIAL_MATRIX){
4867     /* put together the new matrix */
4868     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4869 
4870     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4871     /* Since these are PETSc arrays, change flags to free them as necessary. */
4872     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4873     b_oth->free_a  = PETSC_TRUE;
4874     b_oth->free_ij = PETSC_TRUE;
4875     b_oth->nonew   = 0;
4876 
4877     ierr = PetscFree(bufj);CHKERRQ(ierr);
4878     if (!startsj || !bufa_ptr){
4879       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4880       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4881     } else {
4882       *startsj  = sstartsj;
4883       *bufa_ptr = bufa;
4884     }
4885   }
4886   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4887   PetscFunctionReturn(0);
4888 }
4889 
4890 #undef __FUNCT__
4891 #define __FUNCT__ "MatGetCommunicationStructs"
4892 /*@C
4893   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4894 
4895   Not Collective
4896 
4897   Input Parameters:
4898 . A - The matrix in mpiaij format
4899 
4900   Output Parameter:
4901 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4902 . colmap - A map from global column index to local index into lvec
4903 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4904 
4905   Level: developer
4906 
4907 @*/
4908 #if defined (PETSC_USE_CTABLE)
4909 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4910 #else
4911 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4912 #endif
4913 {
4914   Mat_MPIAIJ *a;
4915 
4916   PetscFunctionBegin;
4917   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4918   PetscValidPointer(lvec, 2)
4919   PetscValidPointer(colmap, 3)
4920   PetscValidPointer(multScatter, 4)
4921   a = (Mat_MPIAIJ *) A->data;
4922   if (lvec) *lvec = a->lvec;
4923   if (colmap) *colmap = a->colmap;
4924   if (multScatter) *multScatter = a->Mvctx;
4925   PetscFunctionReturn(0);
4926 }
4927 
4928 EXTERN_C_BEGIN
4929 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*);
4930 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*);
4931 EXTERN_C_END
4932 
4933 #include "../src/mat/impls/dense/mpi/mpidense.h"
4934 
4935 #undef __FUNCT__
4936 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4937 /*
4938     Computes (B'*A')' since computing B*A directly is untenable
4939 
4940                n                       p                          p
4941         (              )       (              )         (                  )
4942       m (      A       )  *  n (       B      )   =   m (         C        )
4943         (              )       (              )         (                  )
4944 
4945 */
4946 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4947 {
4948   PetscErrorCode     ierr;
4949   Mat                At,Bt,Ct;
4950 
4951   PetscFunctionBegin;
4952   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4953   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4954   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4955   ierr = MatDestroy(At);CHKERRQ(ierr);
4956   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4957   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4958   ierr = MatDestroy(Ct);CHKERRQ(ierr);
4959   PetscFunctionReturn(0);
4960 }
4961 
4962 #undef __FUNCT__
4963 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4964 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4965 {
4966   PetscErrorCode ierr;
4967   PetscInt       m=A->rmap->n,n=B->cmap->n;
4968   Mat            Cmat;
4969 
4970   PetscFunctionBegin;
4971   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4972   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
4973   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4974   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4975   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
4976   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4977   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4978   *C   = Cmat;
4979   PetscFunctionReturn(0);
4980 }
4981 
4982 /* ----------------------------------------------------------------*/
4983 #undef __FUNCT__
4984 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4985 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4986 {
4987   PetscErrorCode ierr;
4988 
4989   PetscFunctionBegin;
4990   if (scall == MAT_INITIAL_MATRIX){
4991     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4992   }
4993   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4994   PetscFunctionReturn(0);
4995 }
4996 
4997 EXTERN_C_BEGIN
4998 #if defined(PETSC_HAVE_MUMPS)
4999 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*);
5000 #endif
5001 #if defined(PETSC_HAVE_PASTIX)
5002 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5003 #endif
5004 #if defined(PETSC_HAVE_SUPERLU_DIST)
5005 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5006 #endif
5007 #if defined(PETSC_HAVE_SPOOLES)
5008 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5009 #endif
5010 EXTERN_C_END
5011 
5012 /*MC
5013    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5014 
5015    Options Database Keys:
5016 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5017 
5018   Level: beginner
5019 
5020 .seealso: MatCreateMPIAIJ()
5021 M*/
5022 
5023 EXTERN_C_BEGIN
5024 #undef __FUNCT__
5025 #define __FUNCT__ "MatCreate_MPIAIJ"
5026 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
5027 {
5028   Mat_MPIAIJ     *b;
5029   PetscErrorCode ierr;
5030   PetscMPIInt    size;
5031 
5032   PetscFunctionBegin;
5033   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5034 
5035   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5036   B->data         = (void*)b;
5037   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5038   B->rmap->bs      = 1;
5039   B->assembled    = PETSC_FALSE;
5040   B->mapping      = 0;
5041 
5042   B->insertmode      = NOT_SET_VALUES;
5043   b->size            = size;
5044   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5045 
5046   /* build cache for off array entries formed */
5047   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5048   b->donotstash  = PETSC_FALSE;
5049   b->colmap      = 0;
5050   b->garray      = 0;
5051   b->roworiented = PETSC_TRUE;
5052 
5053   /* stuff used for matrix vector multiply */
5054   b->lvec      = PETSC_NULL;
5055   b->Mvctx     = PETSC_NULL;
5056 
5057   /* stuff for MatGetRow() */
5058   b->rowindices   = 0;
5059   b->rowvalues    = 0;
5060   b->getrowactive = PETSC_FALSE;
5061 
5062 #if defined(PETSC_HAVE_SPOOLES)
5063   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_spooles_C",
5064                                      "MatGetFactor_mpiaij_spooles",
5065                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5066 #endif
5067 #if defined(PETSC_HAVE_MUMPS)
5068   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_mumps_C",
5069                                      "MatGetFactor_mpiaij_mumps",
5070                                      MatGetFactor_mpiaij_mumps);CHKERRQ(ierr);
5071 #endif
5072 #if defined(PETSC_HAVE_PASTIX)
5073   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_pastix_C",
5074 					   "MatGetFactor_mpiaij_pastix",
5075 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5076 #endif
5077 #if defined(PETSC_HAVE_SUPERLU_DIST)
5078   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_superlu_dist_C",
5079                                      "MatGetFactor_mpiaij_superlu_dist",
5080                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5081 #endif
5082   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5083                                      "MatStoreValues_MPIAIJ",
5084                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5085   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5086                                      "MatRetrieveValues_MPIAIJ",
5087                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5088   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5089 				     "MatGetDiagonalBlock_MPIAIJ",
5090                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5091   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5092 				     "MatIsTranspose_MPIAIJ",
5093 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5094   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5095 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5096 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5097   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5098 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5099 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5100   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5101 				     "MatDiagonalScaleLocal_MPIAIJ",
5102 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5103   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
5104                                      "MatConvert_MPIAIJ_MPICSRPERM",
5105                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
5106   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
5107                                      "MatConvert_MPIAIJ_MPICRL",
5108                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
5109   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5110                                      "MatMatMult_MPIDense_MPIAIJ",
5111                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5112   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5113                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5114                                       MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5115   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5116                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5117                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5118   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5119   PetscFunctionReturn(0);
5120 }
5121 EXTERN_C_END
5122 
5123 #undef __FUNCT__
5124 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5125 /*@
5126      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5127          and "off-diagonal" part of the matrix in CSR format.
5128 
5129    Collective on MPI_Comm
5130 
5131    Input Parameters:
5132 +  comm - MPI communicator
5133 .  m - number of local rows (Cannot be PETSC_DECIDE)
5134 .  n - This value should be the same as the local size used in creating the
5135        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5136        calculated if N is given) For square matrices n is almost always m.
5137 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5138 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5139 .   i - row indices for "diagonal" portion of matrix
5140 .   j - column indices
5141 .   a - matrix values
5142 .   oi - row indices for "off-diagonal" portion of matrix
5143 .   oj - column indices
5144 -   oa - matrix values
5145 
5146    Output Parameter:
5147 .   mat - the matrix
5148 
5149    Level: advanced
5150 
5151    Notes:
5152        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5153 
5154        The i and j indices are 0 based
5155 
5156        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5157 
5158 
5159 .keywords: matrix, aij, compressed row, sparse, parallel
5160 
5161 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5162           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5163 @*/
5164 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5165 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5166 {
5167   PetscErrorCode ierr;
5168   Mat_MPIAIJ     *maij;
5169 
5170  PetscFunctionBegin;
5171   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5172   if (i[0]) {
5173     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5174   }
5175   if (oi[0]) {
5176     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5177   }
5178   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5179   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5180   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5181   maij = (Mat_MPIAIJ*) (*mat)->data;
5182   maij->donotstash     = PETSC_TRUE;
5183   (*mat)->preallocated = PETSC_TRUE;
5184 
5185   ierr = PetscMapSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5186   ierr = PetscMapSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5187   ierr = PetscMapSetUp((*mat)->rmap);CHKERRQ(ierr);
5188   ierr = PetscMapSetUp((*mat)->cmap);CHKERRQ(ierr);
5189 
5190   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5191   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5192 
5193   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5194   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5195   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5196   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5197 
5198   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5199   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5200   PetscFunctionReturn(0);
5201 }
5202 
5203 /*
5204     Special version for direct calls from Fortran
5205 */
5206 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5207 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5208 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5209 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5210 #endif
5211 
5212 /* Change these macros so can be used in void function */
5213 #undef CHKERRQ
5214 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5215 #undef SETERRQ2
5216 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5217 #undef SETERRQ
5218 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5219 
5220 EXTERN_C_BEGIN
5221 #undef __FUNCT__
5222 #define __FUNCT__ "matsetvaluesmpiaij_"
5223 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5224 {
5225   Mat             mat = *mmat;
5226   PetscInt        m = *mm, n = *mn;
5227   InsertMode      addv = *maddv;
5228   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5229   PetscScalar     value;
5230   PetscErrorCode  ierr;
5231 
5232   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5233   if (mat->insertmode == NOT_SET_VALUES) {
5234     mat->insertmode = addv;
5235   }
5236 #if defined(PETSC_USE_DEBUG)
5237   else if (mat->insertmode != addv) {
5238     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5239   }
5240 #endif
5241   {
5242   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5243   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5244   PetscTruth      roworiented = aij->roworiented;
5245 
5246   /* Some Variables required in the macro */
5247   Mat             A = aij->A;
5248   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5249   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5250   MatScalar       *aa = a->a;
5251   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5252   Mat             B = aij->B;
5253   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5254   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5255   MatScalar       *ba = b->a;
5256 
5257   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5258   PetscInt        nonew = a->nonew;
5259   MatScalar       *ap1,*ap2;
5260 
5261   PetscFunctionBegin;
5262   for (i=0; i<m; i++) {
5263     if (im[i] < 0) continue;
5264 #if defined(PETSC_USE_DEBUG)
5265     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5266 #endif
5267     if (im[i] >= rstart && im[i] < rend) {
5268       row      = im[i] - rstart;
5269       lastcol1 = -1;
5270       rp1      = aj + ai[row];
5271       ap1      = aa + ai[row];
5272       rmax1    = aimax[row];
5273       nrow1    = ailen[row];
5274       low1     = 0;
5275       high1    = nrow1;
5276       lastcol2 = -1;
5277       rp2      = bj + bi[row];
5278       ap2      = ba + bi[row];
5279       rmax2    = bimax[row];
5280       nrow2    = bilen[row];
5281       low2     = 0;
5282       high2    = nrow2;
5283 
5284       for (j=0; j<n; j++) {
5285         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5286         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5287         if (in[j] >= cstart && in[j] < cend){
5288           col = in[j] - cstart;
5289           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5290         } else if (in[j] < 0) continue;
5291 #if defined(PETSC_USE_DEBUG)
5292         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5293 #endif
5294         else {
5295           if (mat->was_assembled) {
5296             if (!aij->colmap) {
5297               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5298             }
5299 #if defined (PETSC_USE_CTABLE)
5300             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5301 	    col--;
5302 #else
5303             col = aij->colmap[in[j]] - 1;
5304 #endif
5305             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5306               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5307               col =  in[j];
5308               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5309               B = aij->B;
5310               b = (Mat_SeqAIJ*)B->data;
5311               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5312               rp2      = bj + bi[row];
5313               ap2      = ba + bi[row];
5314               rmax2    = bimax[row];
5315               nrow2    = bilen[row];
5316               low2     = 0;
5317               high2    = nrow2;
5318               bm       = aij->B->rmap->n;
5319               ba = b->a;
5320             }
5321           } else col = in[j];
5322           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5323         }
5324       }
5325     } else {
5326       if (!aij->donotstash) {
5327         if (roworiented) {
5328           if (ignorezeroentries && v[i*n] == 0.0) continue;
5329           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
5330         } else {
5331           if (ignorezeroentries && v[i] == 0.0) continue;
5332           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
5333         }
5334       }
5335     }
5336   }}
5337   PetscFunctionReturnVoid();
5338 }
5339 EXTERN_C_END
5340 
5341