xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision d4bf62d159b4eec940d9872c577cf78ea329f539)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "MatDistribute_MPIAIJ"
7 /*
8     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
9     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
10 
11     Only for square matrices
12 */
13 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
14 {
15   PetscMPIInt    rank,size;
16   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
17   PetscErrorCode ierr;
18   Mat            mat;
19   Mat_SeqAIJ     *gmata;
20   PetscMPIInt    tag;
21   MPI_Status     status;
22   PetscTruth     aij;
23   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
24 
25   PetscFunctionBegin;
26   CHKMEMQ;
27   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
28   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
29   if (!rank) {
30     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
31     if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
32   }
33   if (reuse == MAT_INITIAL_MATRIX) {
34     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
35     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
36     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
37     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
38     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
39     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
40     rowners[0] = 0;
41     for (i=2; i<=size; i++) {
42       rowners[i] += rowners[i-1];
43     }
44     rstart = rowners[rank];
45     rend   = rowners[rank+1];
46     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
47     if (!rank) {
48       gmata = (Mat_SeqAIJ*) gmat->data;
49       /* send row lengths to all processors */
50       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
51       for (i=1; i<size; i++) {
52 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
53       }
54       /* determine number diagonal and off-diagonal counts */
55       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
56       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
57       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
58       jj = 0;
59       for (i=0; i<m; i++) {
60 	for (j=0; j<dlens[i]; j++) {
61           if (gmata->j[jj] < rstart) ld[i]++;
62 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
63 	  jj++;
64 	}
65       }
66       /* send column indices to other processes */
67       for (i=1; i<size; i++) {
68 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
69 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
70 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71       }
72 
73       /* send numerical values to other processes */
74       for (i=1; i<size; i++) {
75         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
76         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
77       }
78       gmataa = gmata->a;
79       gmataj = gmata->j;
80 
81     } else {
82       /* receive row lengths */
83       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
84       /* receive column indices */
85       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
86       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
87       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
88       /* determine number diagonal and off-diagonal counts */
89       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
90       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
91       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
92       jj = 0;
93       for (i=0; i<m; i++) {
94 	for (j=0; j<dlens[i]; j++) {
95           if (gmataj[jj] < rstart) ld[i]++;
96 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
97 	  jj++;
98 	}
99       }
100       /* receive numerical values */
101       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
102       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
103     }
104     /* set preallocation */
105     for (i=0; i<m; i++) {
106       dlens[i] -= olens[i];
107     }
108     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
109     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
110 
111     for (i=0; i<m; i++) {
112       dlens[i] += olens[i];
113     }
114     cnt  = 0;
115     for (i=0; i<m; i++) {
116       row  = rstart + i;
117       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
118       cnt += dlens[i];
119     }
120     if (rank) {
121       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
122     }
123     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
124     ierr = PetscFree(rowners);CHKERRQ(ierr);
125     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
126     *inmat = mat;
127   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
128     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
129     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
130     mat   = *inmat;
131     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
132     if (!rank) {
133       /* send numerical values to other processes */
134       gmata = (Mat_SeqAIJ*) gmat->data;
135       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
136       gmataa = gmata->a;
137       for (i=1; i<size; i++) {
138         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
139         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
140       }
141       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
142     } else {
143       /* receive numerical values from process 0*/
144       nz   = Ad->nz + Ao->nz;
145       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
146       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
147     }
148     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
149     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
150     ad = Ad->a;
151     ao = Ao->a;
152     if (mat->rmap->n) {
153       i  = 0;
154       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
155       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
156     }
157     for (i=1; i<mat->rmap->n; i++) {
158       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
159       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
160     }
161     i--;
162     if (mat->rmap->n) {
163       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
164     }
165     if (rank) {
166       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
167     }
168   }
169   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
170   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   CHKMEMQ;
172   PetscFunctionReturn(0);
173 }
174 
175 /*
176   Local utility routine that creates a mapping from the global column
177 number to the local number in the off-diagonal part of the local
178 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
179 a slightly higher hash table cost; without it it is not scalable (each processor
180 has an order N integer array but is fast to acess.
181 */
182 #undef __FUNCT__
183 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
184 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
185 {
186   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
187   PetscErrorCode ierr;
188   PetscInt       n = aij->B->cmap->n,i;
189 
190   PetscFunctionBegin;
191 #if defined (PETSC_USE_CTABLE)
192   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
193   for (i=0; i<n; i++){
194     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
195   }
196 #else
197   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
198   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
199   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
201 #endif
202   PetscFunctionReturn(0);
203 }
204 
205 
206 #define CHUNKSIZE   15
207 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
208 { \
209     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
210     lastcol1 = col;\
211     while (high1-low1 > 5) { \
212       t = (low1+high1)/2; \
213       if (rp1[t] > col) high1 = t; \
214       else             low1  = t; \
215     } \
216       for (_i=low1; _i<high1; _i++) { \
217         if (rp1[_i] > col) break; \
218         if (rp1[_i] == col) { \
219           if (addv == ADD_VALUES) ap1[_i] += value;   \
220           else                    ap1[_i] = value; \
221           goto a_noinsert; \
222         } \
223       }  \
224       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
225       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
226       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
227       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
228       N = nrow1++ - 1; a->nz++; high1++; \
229       /* shift up all the later entries in this row */ \
230       for (ii=N; ii>=_i; ii--) { \
231         rp1[ii+1] = rp1[ii]; \
232         ap1[ii+1] = ap1[ii]; \
233       } \
234       rp1[_i] = col;  \
235       ap1[_i] = value;  \
236       a_noinsert: ; \
237       ailen[row] = nrow1; \
238 }
239 
240 
241 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
242 { \
243     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
244     lastcol2 = col;\
245     while (high2-low2 > 5) { \
246       t = (low2+high2)/2; \
247       if (rp2[t] > col) high2 = t; \
248       else             low2  = t; \
249     } \
250     for (_i=low2; _i<high2; _i++) {		\
251       if (rp2[_i] > col) break;			\
252       if (rp2[_i] == col) {			      \
253 	if (addv == ADD_VALUES) ap2[_i] += value;     \
254 	else                    ap2[_i] = value;      \
255 	goto b_noinsert;			      \
256       }						      \
257     }							      \
258     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
259     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
260     if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
261     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
262     N = nrow2++ - 1; b->nz++; high2++;					\
263     /* shift up all the later entries in this row */			\
264     for (ii=N; ii>=_i; ii--) {						\
265       rp2[ii+1] = rp2[ii];						\
266       ap2[ii+1] = ap2[ii];						\
267     }									\
268     rp2[_i] = col;							\
269     ap2[_i] = value;							\
270     b_noinsert: ;								\
271     bilen[row] = nrow2;							\
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
276 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
277 {
278   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
279   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
280   PetscErrorCode ierr;
281   PetscInt       l,*garray = mat->garray,diag;
282 
283   PetscFunctionBegin;
284   /* code only works for square matrices A */
285 
286   /* find size of row to the left of the diagonal part */
287   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
288   row  = row - diag;
289   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
290     if (garray[b->j[b->i[row]+l]] > diag) break;
291   }
292   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
293 
294   /* diagonal part */
295   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
296 
297   /* right of diagonal part */
298   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
299   PetscFunctionReturn(0);
300 }
301 
302 #undef __FUNCT__
303 #define __FUNCT__ "MatSetValues_MPIAIJ"
304 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
305 {
306   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
307   PetscScalar    value;
308   PetscErrorCode ierr;
309   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
310   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
311   PetscTruth     roworiented = aij->roworiented;
312 
313   /* Some Variables required in the macro */
314   Mat            A = aij->A;
315   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
316   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
317   MatScalar      *aa = a->a;
318   PetscTruth     ignorezeroentries = a->ignorezeroentries;
319   Mat            B = aij->B;
320   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
321   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
322   MatScalar      *ba = b->a;
323 
324   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
325   PetscInt       nonew = a->nonew;
326   MatScalar      *ap1,*ap2;
327 
328   PetscFunctionBegin;
329   for (i=0; i<m; i++) {
330     if (im[i] < 0) continue;
331 #if defined(PETSC_USE_DEBUG)
332     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
333 #endif
334     if (im[i] >= rstart && im[i] < rend) {
335       row      = im[i] - rstart;
336       lastcol1 = -1;
337       rp1      = aj + ai[row];
338       ap1      = aa + ai[row];
339       rmax1    = aimax[row];
340       nrow1    = ailen[row];
341       low1     = 0;
342       high1    = nrow1;
343       lastcol2 = -1;
344       rp2      = bj + bi[row];
345       ap2      = ba + bi[row];
346       rmax2    = bimax[row];
347       nrow2    = bilen[row];
348       low2     = 0;
349       high2    = nrow2;
350 
351       for (j=0; j<n; j++) {
352         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
353         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
354         if (in[j] >= cstart && in[j] < cend){
355           col = in[j] - cstart;
356           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
357         } else if (in[j] < 0) continue;
358 #if defined(PETSC_USE_DEBUG)
359         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
360 #endif
361         else {
362           if (mat->was_assembled) {
363             if (!aij->colmap) {
364               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
365             }
366 #if defined (PETSC_USE_CTABLE)
367             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
368 	    col--;
369 #else
370             col = aij->colmap[in[j]] - 1;
371 #endif
372             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
373               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
374               col =  in[j];
375               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
376               B = aij->B;
377               b = (Mat_SeqAIJ*)B->data;
378               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
379               rp2      = bj + bi[row];
380               ap2      = ba + bi[row];
381               rmax2    = bimax[row];
382               nrow2    = bilen[row];
383               low2     = 0;
384               high2    = nrow2;
385               bm       = aij->B->rmap->n;
386               ba = b->a;
387             }
388           } else col = in[j];
389           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
390         }
391       }
392     } else {
393       if (!aij->donotstash) {
394         if (roworiented) {
395           if (ignorezeroentries && v[i*n] == 0.0) continue;
396           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
397         } else {
398           if (ignorezeroentries && v[i] == 0.0) continue;
399           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
400         }
401       }
402     }
403   }
404   PetscFunctionReturn(0);
405 }
406 
407 #undef __FUNCT__
408 #define __FUNCT__ "MatGetValues_MPIAIJ"
409 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
410 {
411   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
412   PetscErrorCode ierr;
413   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
414   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
415 
416   PetscFunctionBegin;
417   for (i=0; i<m; i++) {
418     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
419     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
420     if (idxm[i] >= rstart && idxm[i] < rend) {
421       row = idxm[i] - rstart;
422       for (j=0; j<n; j++) {
423         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
424         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
425         if (idxn[j] >= cstart && idxn[j] < cend){
426           col = idxn[j] - cstart;
427           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
428         } else {
429           if (!aij->colmap) {
430             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
431           }
432 #if defined (PETSC_USE_CTABLE)
433           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
434           col --;
435 #else
436           col = aij->colmap[idxn[j]] - 1;
437 #endif
438           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
439           else {
440             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
441           }
442         }
443       }
444     } else {
445       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
446     }
447   }
448   PetscFunctionReturn(0);
449 }
450 
451 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
452 
453 #undef __FUNCT__
454 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
455 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
456 {
457   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
458   PetscErrorCode ierr;
459   PetscInt       nstash,reallocs;
460   InsertMode     addv;
461 
462   PetscFunctionBegin;
463   if (aij->donotstash) {
464     PetscFunctionReturn(0);
465   }
466 
467   /* make sure all processors are either in INSERTMODE or ADDMODE */
468   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
469   if (addv == (ADD_VALUES|INSERT_VALUES)) {
470     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
471   }
472   mat->insertmode = addv; /* in case this processor had no cache */
473 
474   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
475   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
476   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
477   PetscFunctionReturn(0);
478 }
479 
480 #undef __FUNCT__
481 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
482 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
483 {
484   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
485   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
486   PetscErrorCode ierr;
487   PetscMPIInt    n;
488   PetscInt       i,j,rstart,ncols,flg;
489   PetscInt       *row,*col;
490   PetscTruth     other_disassembled;
491   PetscScalar    *val;
492   InsertMode     addv = mat->insertmode;
493 
494   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
495   PetscFunctionBegin;
496   if (!aij->donotstash) {
497     while (1) {
498       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
499       if (!flg) break;
500 
501       for (i=0; i<n;) {
502         /* Now identify the consecutive vals belonging to the same row */
503         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
504         if (j < n) ncols = j-i;
505         else       ncols = n-i;
506         /* Now assemble all these values with a single function call */
507         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
508         i = j;
509       }
510     }
511     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
512   }
513   a->compressedrow.use     = PETSC_FALSE;
514   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
515   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
516 
517   /* determine if any processor has disassembled, if so we must
518      also disassemble ourselfs, in order that we may reassemble. */
519   /*
520      if nonzero structure of submatrix B cannot change then we know that
521      no processor disassembled thus we can skip this stuff
522   */
523   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
524     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
525     if (mat->was_assembled && !other_disassembled) {
526       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
527     }
528   }
529   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
530     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
531   }
532   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
533   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
534   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
535   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
536 
537   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
538   aij->rowvalues = 0;
539 
540   /* used by MatAXPY() */
541   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
542   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
543 
544   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
545   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
546   PetscFunctionReturn(0);
547 }
548 
549 #undef __FUNCT__
550 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
551 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
552 {
553   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
554   PetscErrorCode ierr;
555 
556   PetscFunctionBegin;
557   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
558   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
559   PetscFunctionReturn(0);
560 }
561 
562 #undef __FUNCT__
563 #define __FUNCT__ "MatZeroRows_MPIAIJ"
564 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
565 {
566   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
567   PetscErrorCode ierr;
568   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
569   PetscInt       i,*owners = A->rmap->range;
570   PetscInt       *nprocs,j,idx,nsends,row;
571   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
572   PetscInt       *rvalues,count,base,slen,*source;
573   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
574   MPI_Comm       comm = ((PetscObject)A)->comm;
575   MPI_Request    *send_waits,*recv_waits;
576   MPI_Status     recv_status,*send_status;
577 #if defined(PETSC_DEBUG)
578   PetscTruth     found = PETSC_FALSE;
579 #endif
580 
581   PetscFunctionBegin;
582   /*  first count number of contributors to each processor */
583   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
584   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
585   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
586   j = 0;
587   for (i=0; i<N; i++) {
588     if (lastidx > (idx = rows[i])) j = 0;
589     lastidx = idx;
590     for (; j<size; j++) {
591       if (idx >= owners[j] && idx < owners[j+1]) {
592         nprocs[2*j]++;
593         nprocs[2*j+1] = 1;
594         owner[i] = j;
595 #if defined(PETSC_DEBUG)
596         found = PETSC_TRUE;
597 #endif
598         break;
599       }
600     }
601 #if defined(PETSC_DEBUG)
602     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
603     found = PETSC_FALSE;
604 #endif
605   }
606   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
607 
608   /* inform other processors of number of messages and max length*/
609   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
610 
611   /* post receives:   */
612   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
613   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
614   for (i=0; i<nrecvs; i++) {
615     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
616   }
617 
618   /* do sends:
619       1) starts[i] gives the starting index in svalues for stuff going to
620          the ith processor
621   */
622   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
623   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
624   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
625   starts[0] = 0;
626   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
627   for (i=0; i<N; i++) {
628     svalues[starts[owner[i]]++] = rows[i];
629   }
630 
631   starts[0] = 0;
632   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
633   count = 0;
634   for (i=0; i<size; i++) {
635     if (nprocs[2*i+1]) {
636       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
637     }
638   }
639   ierr = PetscFree(starts);CHKERRQ(ierr);
640 
641   base = owners[rank];
642 
643   /*  wait on receives */
644   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
645   source = lens + nrecvs;
646   count  = nrecvs; slen = 0;
647   while (count) {
648     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
649     /* unpack receives into our local space */
650     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
651     source[imdex]  = recv_status.MPI_SOURCE;
652     lens[imdex]    = n;
653     slen          += n;
654     count--;
655   }
656   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
657 
658   /* move the data into the send scatter */
659   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
660   count = 0;
661   for (i=0; i<nrecvs; i++) {
662     values = rvalues + i*nmax;
663     for (j=0; j<lens[i]; j++) {
664       lrows[count++] = values[j] - base;
665     }
666   }
667   ierr = PetscFree(rvalues);CHKERRQ(ierr);
668   ierr = PetscFree(lens);CHKERRQ(ierr);
669   ierr = PetscFree(owner);CHKERRQ(ierr);
670   ierr = PetscFree(nprocs);CHKERRQ(ierr);
671 
672   /* actually zap the local rows */
673   /*
674         Zero the required rows. If the "diagonal block" of the matrix
675      is square and the user wishes to set the diagonal we use separate
676      code so that MatSetValues() is not called for each diagonal allocating
677      new memory, thus calling lots of mallocs and slowing things down.
678 
679        Contributed by: Matthew Knepley
680   */
681   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
682   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
683   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
684     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
685   } else if (diag != 0.0) {
686     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
687     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
688       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
689 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
690     }
691     for (i = 0; i < slen; i++) {
692       row  = lrows[i] + rstart;
693       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
694     }
695     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
696     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
697   } else {
698     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
699   }
700   ierr = PetscFree(lrows);CHKERRQ(ierr);
701 
702   /* wait on sends */
703   if (nsends) {
704     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
705     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
706     ierr = PetscFree(send_status);CHKERRQ(ierr);
707   }
708   ierr = PetscFree(send_waits);CHKERRQ(ierr);
709   ierr = PetscFree(svalues);CHKERRQ(ierr);
710 
711   PetscFunctionReturn(0);
712 }
713 
714 #undef __FUNCT__
715 #define __FUNCT__ "MatMult_MPIAIJ"
716 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
717 {
718   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
719   PetscErrorCode ierr;
720   PetscInt       nt;
721 
722   PetscFunctionBegin;
723   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
724   if (nt != A->cmap->n) {
725     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
726   }
727   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
728   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
729   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
730   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
731   PetscFunctionReturn(0);
732 }
733 
734 #undef __FUNCT__
735 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
736 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
737 {
738   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
739   PetscErrorCode ierr;
740 
741   PetscFunctionBegin;
742   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
743   PetscFunctionReturn(0);
744 }
745 
746 #undef __FUNCT__
747 #define __FUNCT__ "MatMultAdd_MPIAIJ"
748 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
749 {
750   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
751   PetscErrorCode ierr;
752 
753   PetscFunctionBegin;
754   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
755   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
756   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
757   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
758   PetscFunctionReturn(0);
759 }
760 
761 #undef __FUNCT__
762 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
763 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
764 {
765   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
766   PetscErrorCode ierr;
767   PetscTruth     merged;
768 
769   PetscFunctionBegin;
770   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
771   /* do nondiagonal part */
772   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
773   if (!merged) {
774     /* send it on its way */
775     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
776     /* do local part */
777     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
778     /* receive remote parts: note this assumes the values are not actually */
779     /* added in yy until the next line, */
780     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
781   } else {
782     /* do local part */
783     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
784     /* send it on its way */
785     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
786     /* values actually were received in the Begin() but we need to call this nop */
787     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
788   }
789   PetscFunctionReturn(0);
790 }
791 
792 EXTERN_C_BEGIN
793 #undef __FUNCT__
794 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
795 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
796 {
797   MPI_Comm       comm;
798   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
799   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
800   IS             Me,Notme;
801   PetscErrorCode ierr;
802   PetscInt       M,N,first,last,*notme,i;
803   PetscMPIInt    size;
804 
805   PetscFunctionBegin;
806 
807   /* Easy test: symmetric diagonal block */
808   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
809   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
810   if (!*f) PetscFunctionReturn(0);
811   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
812   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
813   if (size == 1) PetscFunctionReturn(0);
814 
815   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
816   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
817   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
818   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
819   for (i=0; i<first; i++) notme[i] = i;
820   for (i=last; i<M; i++) notme[i-last+first] = i;
821   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
822   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
823   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
824   Aoff = Aoffs[0];
825   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
826   Boff = Boffs[0];
827   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
828   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
829   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
830   ierr = ISDestroy(Me);CHKERRQ(ierr);
831   ierr = ISDestroy(Notme);CHKERRQ(ierr);
832 
833   PetscFunctionReturn(0);
834 }
835 EXTERN_C_END
836 
837 #undef __FUNCT__
838 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
839 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
840 {
841   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
842   PetscErrorCode ierr;
843 
844   PetscFunctionBegin;
845   /* do nondiagonal part */
846   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
847   /* send it on its way */
848   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
849   /* do local part */
850   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
851   /* receive remote parts */
852   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
853   PetscFunctionReturn(0);
854 }
855 
856 /*
857   This only works correctly for square matrices where the subblock A->A is the
858    diagonal block
859 */
860 #undef __FUNCT__
861 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
862 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
863 {
864   PetscErrorCode ierr;
865   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
866 
867   PetscFunctionBegin;
868   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
869   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) {
870     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
871   }
872   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
873   PetscFunctionReturn(0);
874 }
875 
876 #undef __FUNCT__
877 #define __FUNCT__ "MatScale_MPIAIJ"
878 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
879 {
880   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
881   PetscErrorCode ierr;
882 
883   PetscFunctionBegin;
884   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
885   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
886   PetscFunctionReturn(0);
887 }
888 
889 #undef __FUNCT__
890 #define __FUNCT__ "MatDestroy_MPIAIJ"
891 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
892 {
893   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
894   PetscErrorCode ierr;
895 
896   PetscFunctionBegin;
897 #if defined(PETSC_USE_LOG)
898   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
899 #endif
900   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
901   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
902   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
903   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
904 #if defined (PETSC_USE_CTABLE)
905   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
906 #else
907   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
908 #endif
909   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
910   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
911   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
912   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
913   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
914   ierr = PetscFree(aij);CHKERRQ(ierr);
915 
916   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
917   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
918   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
919   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
920   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
921   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
922   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
923   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
924   PetscFunctionReturn(0);
925 }
926 
927 #undef __FUNCT__
928 #define __FUNCT__ "MatView_MPIAIJ_Binary"
929 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
930 {
931   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
932   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
933   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
934   PetscErrorCode    ierr;
935   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
936   int               fd;
937   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
938   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
939   PetscScalar       *column_values;
940 
941   PetscFunctionBegin;
942   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
943   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
944   nz   = A->nz + B->nz;
945   if (!rank) {
946     header[0] = MAT_FILE_COOKIE;
947     header[1] = mat->rmap->N;
948     header[2] = mat->cmap->N;
949     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
950     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
951     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
952     /* get largest number of rows any processor has */
953     rlen = mat->rmap->n;
954     range = mat->rmap->range;
955     for (i=1; i<size; i++) {
956       rlen = PetscMax(rlen,range[i+1] - range[i]);
957     }
958   } else {
959     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
960     rlen = mat->rmap->n;
961   }
962 
963   /* load up the local row counts */
964   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
965   for (i=0; i<mat->rmap->n; i++) {
966     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
967   }
968 
969   /* store the row lengths to the file */
970   if (!rank) {
971     MPI_Status status;
972     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
973     for (i=1; i<size; i++) {
974       rlen = range[i+1] - range[i];
975       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
976       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
977     }
978   } else {
979     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
980   }
981   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
982 
983   /* load up the local column indices */
984   nzmax = nz; /* )th processor needs space a largest processor needs */
985   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
986   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
987   cnt  = 0;
988   for (i=0; i<mat->rmap->n; i++) {
989     for (j=B->i[i]; j<B->i[i+1]; j++) {
990       if ( (col = garray[B->j[j]]) > cstart) break;
991       column_indices[cnt++] = col;
992     }
993     for (k=A->i[i]; k<A->i[i+1]; k++) {
994       column_indices[cnt++] = A->j[k] + cstart;
995     }
996     for (; j<B->i[i+1]; j++) {
997       column_indices[cnt++] = garray[B->j[j]];
998     }
999   }
1000   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1001 
1002   /* store the column indices to the file */
1003   if (!rank) {
1004     MPI_Status status;
1005     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1006     for (i=1; i<size; i++) {
1007       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1008       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1009       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1010       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1011     }
1012   } else {
1013     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1014     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1015   }
1016   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1017 
1018   /* load up the local column values */
1019   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1020   cnt  = 0;
1021   for (i=0; i<mat->rmap->n; i++) {
1022     for (j=B->i[i]; j<B->i[i+1]; j++) {
1023       if ( garray[B->j[j]] > cstart) break;
1024       column_values[cnt++] = B->a[j];
1025     }
1026     for (k=A->i[i]; k<A->i[i+1]; k++) {
1027       column_values[cnt++] = A->a[k];
1028     }
1029     for (; j<B->i[i+1]; j++) {
1030       column_values[cnt++] = B->a[j];
1031     }
1032   }
1033   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1034 
1035   /* store the column values to the file */
1036   if (!rank) {
1037     MPI_Status status;
1038     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1039     for (i=1; i<size; i++) {
1040       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1041       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1042       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1043       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1044     }
1045   } else {
1046     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1047     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1048   }
1049   ierr = PetscFree(column_values);CHKERRQ(ierr);
1050   PetscFunctionReturn(0);
1051 }
1052 
1053 #undef __FUNCT__
1054 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1055 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1056 {
1057   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1058   PetscErrorCode    ierr;
1059   PetscMPIInt       rank = aij->rank,size = aij->size;
1060   PetscTruth        isdraw,iascii,isbinary;
1061   PetscViewer       sviewer;
1062   PetscViewerFormat format;
1063 
1064   PetscFunctionBegin;
1065   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1066   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1067   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1068   if (iascii) {
1069     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1070     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1071       MatInfo    info;
1072       PetscTruth inodes;
1073 
1074       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1075       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1076       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1077       if (!inodes) {
1078         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1079 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1080       } else {
1081         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1082 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1083       }
1084       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1085       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1086       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1087       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1088       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1089       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1090       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1091       PetscFunctionReturn(0);
1092     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1093       PetscInt   inodecount,inodelimit,*inodes;
1094       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1095       if (inodes) {
1096         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1097       } else {
1098         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1099       }
1100       PetscFunctionReturn(0);
1101     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1102       PetscFunctionReturn(0);
1103     }
1104   } else if (isbinary) {
1105     if (size == 1) {
1106       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1107       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1108     } else {
1109       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1110     }
1111     PetscFunctionReturn(0);
1112   } else if (isdraw) {
1113     PetscDraw  draw;
1114     PetscTruth isnull;
1115     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1116     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1117   }
1118 
1119   if (size == 1) {
1120     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1121     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1122   } else {
1123     /* assemble the entire matrix onto first processor. */
1124     Mat         A;
1125     Mat_SeqAIJ  *Aloc;
1126     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1127     MatScalar   *a;
1128 
1129     if (mat->rmap->N > 1024) {
1130       PetscTruth flg = PETSC_FALSE;
1131 
1132       ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1133       if (!flg) {
1134         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1135       }
1136     }
1137 
1138     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1139     if (!rank) {
1140       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1141     } else {
1142       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1143     }
1144     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1145     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1146     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1147     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1148 
1149     /* copy over the A part */
1150     Aloc = (Mat_SeqAIJ*)aij->A->data;
1151     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1152     row = mat->rmap->rstart;
1153     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1154     for (i=0; i<m; i++) {
1155       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1156       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1157     }
1158     aj = Aloc->j;
1159     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1160 
1161     /* copy over the B part */
1162     Aloc = (Mat_SeqAIJ*)aij->B->data;
1163     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1164     row  = mat->rmap->rstart;
1165     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1166     ct   = cols;
1167     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1168     for (i=0; i<m; i++) {
1169       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1170       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1171     }
1172     ierr = PetscFree(ct);CHKERRQ(ierr);
1173     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1174     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1175     /*
1176        Everyone has to call to draw the matrix since the graphics waits are
1177        synchronized across all processors that share the PetscDraw object
1178     */
1179     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1180     if (!rank) {
1181       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1182       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1183     }
1184     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1185     ierr = MatDestroy(A);CHKERRQ(ierr);
1186   }
1187   PetscFunctionReturn(0);
1188 }
1189 
1190 #undef __FUNCT__
1191 #define __FUNCT__ "MatView_MPIAIJ"
1192 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1193 {
1194   PetscErrorCode ierr;
1195   PetscTruth     iascii,isdraw,issocket,isbinary;
1196 
1197   PetscFunctionBegin;
1198   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1199   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1200   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1201   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1202   if (iascii || isdraw || isbinary || issocket) {
1203     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1204   } else {
1205     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1206   }
1207   PetscFunctionReturn(0);
1208 }
1209 
1210 #undef __FUNCT__
1211 #define __FUNCT__ "MatRelax_MPIAIJ"
1212 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1213 {
1214   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1215   PetscErrorCode ierr;
1216   Vec            bb1 = 0;
1217   PetscTruth     hasop;
1218 
1219   PetscFunctionBegin;
1220   if (its > 1) {ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);}
1221 
1222   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1223     if (flag & SOR_ZERO_INITIAL_GUESS) {
1224       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1225       its--;
1226     }
1227 
1228     while (its--) {
1229       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1230       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1231 
1232       /* update rhs: bb1 = bb - B*x */
1233       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1234       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1235 
1236       /* local sweep */
1237       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1238     }
1239   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1240     if (flag & SOR_ZERO_INITIAL_GUESS) {
1241       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1242       its--;
1243     }
1244     while (its--) {
1245       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1246       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1247 
1248       /* update rhs: bb1 = bb - B*x */
1249       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1250       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1251 
1252       /* local sweep */
1253       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1254     }
1255   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1256     if (flag & SOR_ZERO_INITIAL_GUESS) {
1257       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1258       its--;
1259     }
1260     while (its--) {
1261       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1262       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1263 
1264       /* update rhs: bb1 = bb - B*x */
1265       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1266       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1267 
1268       /* local sweep */
1269       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1270     }
1271   }  else if (flag & SOR_EISENSTAT) {
1272     Vec         xx1;
1273 
1274     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1275     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1276     ierr = (*mat->A->ops->relax)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1277 
1278     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1279     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1280     if (!mat->diag) {
1281       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1282       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1283     }
1284     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1285     if (hasop) {
1286       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1287     } else {
1288       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1289     }
1290     ierr = VecWAXPY(bb1,-1.0,bb1,bb);CHKERRQ(ierr);
1291     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1292 
1293     /* local sweep */
1294     ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1295     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1296     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1297   } else {
1298     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1299   }
1300 
1301   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 #undef __FUNCT__
1306 #define __FUNCT__ "MatPermute_MPIAIJ"
1307 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1308 {
1309   MPI_Comm       comm,pcomm;
1310   PetscInt       first,local_size,nrows;
1311   const PetscInt *rows;
1312   PetscMPIInt    size;
1313   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1314   PetscErrorCode ierr;
1315 
1316   PetscFunctionBegin;
1317   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1318   /* make a collective version of 'rowp' */
1319   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1320   if (pcomm==comm) {
1321     crowp = rowp;
1322   } else {
1323     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1324     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1325     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1326     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1327   }
1328   /* collect the global row permutation and invert it */
1329   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1330   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1331   if (pcomm!=comm) {
1332     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1333   }
1334   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1335   /* get the local target indices */
1336   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1337   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1338   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1339   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
1340   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1341   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1342   /* the column permutation is so much easier;
1343      make a local version of 'colp' and invert it */
1344   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1345   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1346   if (size==1) {
1347     lcolp = colp;
1348   } else {
1349     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1350     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1351     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
1352   }
1353   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1354   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1355   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1356   if (size>1) {
1357     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1358     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1359   }
1360   /* now we just get the submatrix */
1361   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1362   /* clean up */
1363   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1364   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1365   PetscFunctionReturn(0);
1366 }
1367 
1368 #undef __FUNCT__
1369 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1370 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1371 {
1372   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1373   Mat            A = mat->A,B = mat->B;
1374   PetscErrorCode ierr;
1375   PetscReal      isend[5],irecv[5];
1376 
1377   PetscFunctionBegin;
1378   info->block_size     = 1.0;
1379   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1380   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1381   isend[3] = info->memory;  isend[4] = info->mallocs;
1382   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1383   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1384   isend[3] += info->memory;  isend[4] += info->mallocs;
1385   if (flag == MAT_LOCAL) {
1386     info->nz_used      = isend[0];
1387     info->nz_allocated = isend[1];
1388     info->nz_unneeded  = isend[2];
1389     info->memory       = isend[3];
1390     info->mallocs      = isend[4];
1391   } else if (flag == MAT_GLOBAL_MAX) {
1392     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1393     info->nz_used      = irecv[0];
1394     info->nz_allocated = irecv[1];
1395     info->nz_unneeded  = irecv[2];
1396     info->memory       = irecv[3];
1397     info->mallocs      = irecv[4];
1398   } else if (flag == MAT_GLOBAL_SUM) {
1399     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1400     info->nz_used      = irecv[0];
1401     info->nz_allocated = irecv[1];
1402     info->nz_unneeded  = irecv[2];
1403     info->memory       = irecv[3];
1404     info->mallocs      = irecv[4];
1405   }
1406   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1407   info->fill_ratio_needed = 0;
1408   info->factor_mallocs    = 0;
1409 
1410   PetscFunctionReturn(0);
1411 }
1412 
1413 #undef __FUNCT__
1414 #define __FUNCT__ "MatSetOption_MPIAIJ"
1415 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1416 {
1417   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1418   PetscErrorCode ierr;
1419 
1420   PetscFunctionBegin;
1421   switch (op) {
1422   case MAT_NEW_NONZERO_LOCATIONS:
1423   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1424   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1425   case MAT_KEEP_NONZERO_PATTERN:
1426   case MAT_NEW_NONZERO_LOCATION_ERR:
1427   case MAT_USE_INODES:
1428   case MAT_IGNORE_ZERO_ENTRIES:
1429     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1430     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1431     break;
1432   case MAT_ROW_ORIENTED:
1433     a->roworiented = flg;
1434     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1435     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1436     break;
1437   case MAT_NEW_DIAGONALS:
1438     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1439     break;
1440   case MAT_IGNORE_OFF_PROC_ENTRIES:
1441     a->donotstash = PETSC_TRUE;
1442     break;
1443   case MAT_SYMMETRIC:
1444     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1445     break;
1446   case MAT_STRUCTURALLY_SYMMETRIC:
1447   case MAT_HERMITIAN:
1448   case MAT_SYMMETRY_ETERNAL:
1449     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1450     break;
1451   default:
1452     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1453   }
1454   PetscFunctionReturn(0);
1455 }
1456 
1457 #undef __FUNCT__
1458 #define __FUNCT__ "MatGetRow_MPIAIJ"
1459 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1460 {
1461   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1462   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1463   PetscErrorCode ierr;
1464   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1465   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1466   PetscInt       *cmap,*idx_p;
1467 
1468   PetscFunctionBegin;
1469   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1470   mat->getrowactive = PETSC_TRUE;
1471 
1472   if (!mat->rowvalues && (idx || v)) {
1473     /*
1474         allocate enough space to hold information from the longest row.
1475     */
1476     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1477     PetscInt     max = 1,tmp;
1478     for (i=0; i<matin->rmap->n; i++) {
1479       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1480       if (max < tmp) { max = tmp; }
1481     }
1482     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1483     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1484   }
1485 
1486   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1487   lrow = row - rstart;
1488 
1489   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1490   if (!v)   {pvA = 0; pvB = 0;}
1491   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1492   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1493   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1494   nztot = nzA + nzB;
1495 
1496   cmap  = mat->garray;
1497   if (v  || idx) {
1498     if (nztot) {
1499       /* Sort by increasing column numbers, assuming A and B already sorted */
1500       PetscInt imark = -1;
1501       if (v) {
1502         *v = v_p = mat->rowvalues;
1503         for (i=0; i<nzB; i++) {
1504           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1505           else break;
1506         }
1507         imark = i;
1508         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1509         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1510       }
1511       if (idx) {
1512         *idx = idx_p = mat->rowindices;
1513         if (imark > -1) {
1514           for (i=0; i<imark; i++) {
1515             idx_p[i] = cmap[cworkB[i]];
1516           }
1517         } else {
1518           for (i=0; i<nzB; i++) {
1519             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1520             else break;
1521           }
1522           imark = i;
1523         }
1524         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1525         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1526       }
1527     } else {
1528       if (idx) *idx = 0;
1529       if (v)   *v   = 0;
1530     }
1531   }
1532   *nz = nztot;
1533   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1534   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1535   PetscFunctionReturn(0);
1536 }
1537 
1538 #undef __FUNCT__
1539 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1540 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1541 {
1542   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1543 
1544   PetscFunctionBegin;
1545   if (!aij->getrowactive) {
1546     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1547   }
1548   aij->getrowactive = PETSC_FALSE;
1549   PetscFunctionReturn(0);
1550 }
1551 
1552 #undef __FUNCT__
1553 #define __FUNCT__ "MatNorm_MPIAIJ"
1554 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1555 {
1556   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1557   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1558   PetscErrorCode ierr;
1559   PetscInt       i,j,cstart = mat->cmap->rstart;
1560   PetscReal      sum = 0.0;
1561   MatScalar      *v;
1562 
1563   PetscFunctionBegin;
1564   if (aij->size == 1) {
1565     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1566   } else {
1567     if (type == NORM_FROBENIUS) {
1568       v = amat->a;
1569       for (i=0; i<amat->nz; i++) {
1570 #if defined(PETSC_USE_COMPLEX)
1571         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1572 #else
1573         sum += (*v)*(*v); v++;
1574 #endif
1575       }
1576       v = bmat->a;
1577       for (i=0; i<bmat->nz; i++) {
1578 #if defined(PETSC_USE_COMPLEX)
1579         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1580 #else
1581         sum += (*v)*(*v); v++;
1582 #endif
1583       }
1584       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1585       *norm = sqrt(*norm);
1586     } else if (type == NORM_1) { /* max column norm */
1587       PetscReal *tmp,*tmp2;
1588       PetscInt  *jj,*garray = aij->garray;
1589       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1590       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1591       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1592       *norm = 0.0;
1593       v = amat->a; jj = amat->j;
1594       for (j=0; j<amat->nz; j++) {
1595         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1596       }
1597       v = bmat->a; jj = bmat->j;
1598       for (j=0; j<bmat->nz; j++) {
1599         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1600       }
1601       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1602       for (j=0; j<mat->cmap->N; j++) {
1603         if (tmp2[j] > *norm) *norm = tmp2[j];
1604       }
1605       ierr = PetscFree(tmp);CHKERRQ(ierr);
1606       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1607     } else if (type == NORM_INFINITY) { /* max row norm */
1608       PetscReal ntemp = 0.0;
1609       for (j=0; j<aij->A->rmap->n; j++) {
1610         v = amat->a + amat->i[j];
1611         sum = 0.0;
1612         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1613           sum += PetscAbsScalar(*v); v++;
1614         }
1615         v = bmat->a + bmat->i[j];
1616         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1617           sum += PetscAbsScalar(*v); v++;
1618         }
1619         if (sum > ntemp) ntemp = sum;
1620       }
1621       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1622     } else {
1623       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1624     }
1625   }
1626   PetscFunctionReturn(0);
1627 }
1628 
1629 #undef __FUNCT__
1630 #define __FUNCT__ "MatTranspose_MPIAIJ"
1631 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1632 {
1633   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1634   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1635   PetscErrorCode ierr;
1636   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1637   PetscInt       cstart=A->cmap->rstart,ncol;
1638   Mat            B;
1639   MatScalar      *array;
1640 
1641   PetscFunctionBegin;
1642   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1643 
1644   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1645   ai = Aloc->i; aj = Aloc->j;
1646   bi = Bloc->i; bj = Bloc->j;
1647   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1648     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1649     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1650     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1651     for (i=0; i<ai[ma]; i++){
1652       d_nnz[aj[i]] ++;
1653       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1654     }
1655 
1656     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1657     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1658     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1659     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1660     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1661   } else {
1662     B = *matout;
1663   }
1664 
1665   /* copy over the A part */
1666   array = Aloc->a;
1667   row = A->rmap->rstart;
1668   for (i=0; i<ma; i++) {
1669     ncol = ai[i+1]-ai[i];
1670     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1671     row++; array += ncol; aj += ncol;
1672   }
1673   aj = Aloc->j;
1674   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1675 
1676   /* copy over the B part */
1677   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1678   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1679   array = Bloc->a;
1680   row = A->rmap->rstart;
1681   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1682   cols_tmp = cols;
1683   for (i=0; i<mb; i++) {
1684     ncol = bi[i+1]-bi[i];
1685     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1686     row++; array += ncol; cols_tmp += ncol;
1687   }
1688   ierr = PetscFree(cols);CHKERRQ(ierr);
1689 
1690   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1691   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1692   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1693     *matout = B;
1694   } else {
1695     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1696   }
1697   PetscFunctionReturn(0);
1698 }
1699 
1700 #undef __FUNCT__
1701 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1702 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1703 {
1704   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1705   Mat            a = aij->A,b = aij->B;
1706   PetscErrorCode ierr;
1707   PetscInt       s1,s2,s3;
1708 
1709   PetscFunctionBegin;
1710   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1711   if (rr) {
1712     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1713     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1714     /* Overlap communication with computation. */
1715     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1716   }
1717   if (ll) {
1718     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1719     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1720     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1721   }
1722   /* scale  the diagonal block */
1723   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1724 
1725   if (rr) {
1726     /* Do a scatter end and then right scale the off-diagonal block */
1727     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1728     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1729   }
1730 
1731   PetscFunctionReturn(0);
1732 }
1733 
1734 #undef __FUNCT__
1735 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1736 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1737 {
1738   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1739   PetscErrorCode ierr;
1740 
1741   PetscFunctionBegin;
1742   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1743   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1744   PetscFunctionReturn(0);
1745 }
1746 #undef __FUNCT__
1747 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1748 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1749 {
1750   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1751   PetscErrorCode ierr;
1752 
1753   PetscFunctionBegin;
1754   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1755   PetscFunctionReturn(0);
1756 }
1757 
1758 #undef __FUNCT__
1759 #define __FUNCT__ "MatEqual_MPIAIJ"
1760 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1761 {
1762   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1763   Mat            a,b,c,d;
1764   PetscTruth     flg;
1765   PetscErrorCode ierr;
1766 
1767   PetscFunctionBegin;
1768   a = matA->A; b = matA->B;
1769   c = matB->A; d = matB->B;
1770 
1771   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1772   if (flg) {
1773     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1774   }
1775   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1776   PetscFunctionReturn(0);
1777 }
1778 
1779 #undef __FUNCT__
1780 #define __FUNCT__ "MatCopy_MPIAIJ"
1781 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1782 {
1783   PetscErrorCode ierr;
1784   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1785   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1786 
1787   PetscFunctionBegin;
1788   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1789   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1790     /* because of the column compression in the off-processor part of the matrix a->B,
1791        the number of columns in a->B and b->B may be different, hence we cannot call
1792        the MatCopy() directly on the two parts. If need be, we can provide a more
1793        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1794        then copying the submatrices */
1795     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1796   } else {
1797     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1798     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1799   }
1800   PetscFunctionReturn(0);
1801 }
1802 
1803 #undef __FUNCT__
1804 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1805 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1806 {
1807   PetscErrorCode ierr;
1808 
1809   PetscFunctionBegin;
1810   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1811   PetscFunctionReturn(0);
1812 }
1813 
1814 #include "petscblaslapack.h"
1815 #undef __FUNCT__
1816 #define __FUNCT__ "MatAXPY_MPIAIJ"
1817 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1818 {
1819   PetscErrorCode ierr;
1820   PetscInt       i;
1821   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1822   PetscBLASInt   bnz,one=1;
1823   Mat_SeqAIJ     *x,*y;
1824 
1825   PetscFunctionBegin;
1826   if (str == SAME_NONZERO_PATTERN) {
1827     PetscScalar alpha = a;
1828     x = (Mat_SeqAIJ *)xx->A->data;
1829     y = (Mat_SeqAIJ *)yy->A->data;
1830     bnz = PetscBLASIntCast(x->nz);
1831     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1832     x = (Mat_SeqAIJ *)xx->B->data;
1833     y = (Mat_SeqAIJ *)yy->B->data;
1834     bnz = PetscBLASIntCast(x->nz);
1835     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1836   } else if (str == SUBSET_NONZERO_PATTERN) {
1837     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1838 
1839     x = (Mat_SeqAIJ *)xx->B->data;
1840     y = (Mat_SeqAIJ *)yy->B->data;
1841     if (y->xtoy && y->XtoY != xx->B) {
1842       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1843       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1844     }
1845     if (!y->xtoy) { /* get xtoy */
1846       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1847       y->XtoY = xx->B;
1848       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1849     }
1850     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1851   } else {
1852     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1853   }
1854   PetscFunctionReturn(0);
1855 }
1856 
1857 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1858 
1859 #undef __FUNCT__
1860 #define __FUNCT__ "MatConjugate_MPIAIJ"
1861 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1862 {
1863 #if defined(PETSC_USE_COMPLEX)
1864   PetscErrorCode ierr;
1865   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1866 
1867   PetscFunctionBegin;
1868   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1869   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1870 #else
1871   PetscFunctionBegin;
1872 #endif
1873   PetscFunctionReturn(0);
1874 }
1875 
1876 #undef __FUNCT__
1877 #define __FUNCT__ "MatRealPart_MPIAIJ"
1878 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1879 {
1880   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1881   PetscErrorCode ierr;
1882 
1883   PetscFunctionBegin;
1884   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1885   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1886   PetscFunctionReturn(0);
1887 }
1888 
1889 #undef __FUNCT__
1890 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1891 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1892 {
1893   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1894   PetscErrorCode ierr;
1895 
1896   PetscFunctionBegin;
1897   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1898   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1899   PetscFunctionReturn(0);
1900 }
1901 
1902 #ifdef PETSC_HAVE_PBGL
1903 
1904 #include <boost/parallel/mpi/bsp_process_group.hpp>
1905 #include <boost/graph/distributed/ilu_default_graph.hpp>
1906 #include <boost/graph/distributed/ilu_0_block.hpp>
1907 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1908 #include <boost/graph/distributed/petsc/interface.hpp>
1909 #include <boost/multi_array.hpp>
1910 #include <boost/parallel/distributed_property_map->hpp>
1911 
1912 #undef __FUNCT__
1913 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1914 /*
1915   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1916 */
1917 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1918 {
1919   namespace petsc = boost::distributed::petsc;
1920 
1921   namespace graph_dist = boost::graph::distributed;
1922   using boost::graph::distributed::ilu_default::process_group_type;
1923   using boost::graph::ilu_permuted;
1924 
1925   PetscTruth      row_identity, col_identity;
1926   PetscContainer  c;
1927   PetscInt        m, n, M, N;
1928   PetscErrorCode  ierr;
1929 
1930   PetscFunctionBegin;
1931   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1932   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1933   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1934   if (!row_identity || !col_identity) {
1935     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1936   }
1937 
1938   process_group_type pg;
1939   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1940   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1941   lgraph_type&   level_graph = *lgraph_p;
1942   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1943 
1944   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1945   ilu_permuted(level_graph);
1946 
1947   /* put together the new matrix */
1948   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1949   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1950   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1951   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1952   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1953   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1954   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1955 
1956   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1957   ierr = PetscContainerSetPointer(c, lgraph_p);
1958   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1959   PetscFunctionReturn(0);
1960 }
1961 
1962 #undef __FUNCT__
1963 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1964 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1965 {
1966   PetscFunctionBegin;
1967   PetscFunctionReturn(0);
1968 }
1969 
1970 #undef __FUNCT__
1971 #define __FUNCT__ "MatSolve_MPIAIJ"
1972 /*
1973   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1974 */
1975 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1976 {
1977   namespace graph_dist = boost::graph::distributed;
1978 
1979   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1980   lgraph_type*   lgraph_p;
1981   PetscContainer c;
1982   PetscErrorCode ierr;
1983 
1984   PetscFunctionBegin;
1985   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1986   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1987   ierr = VecCopy(b, x);CHKERRQ(ierr);
1988 
1989   PetscScalar* array_x;
1990   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1991   PetscInt sx;
1992   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1993 
1994   PetscScalar* array_b;
1995   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1996   PetscInt sb;
1997   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1998 
1999   lgraph_type&   level_graph = *lgraph_p;
2000   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2001 
2002   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2003   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2004                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2005 
2006   typedef boost::iterator_property_map<array_ref_type::iterator,
2007                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2008   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2009                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2010 
2011   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2012 
2013   PetscFunctionReturn(0);
2014 }
2015 #endif
2016 
2017 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2018   PetscInt       nzlocal,nsends,nrecvs;
2019   PetscMPIInt    *send_rank;
2020   PetscInt       *sbuf_nz,*sbuf_j,**rbuf_j;
2021   PetscScalar    *sbuf_a,**rbuf_a;
2022   PetscErrorCode (*MatDestroy)(Mat);
2023 } Mat_Redundant;
2024 
2025 #undef __FUNCT__
2026 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2027 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2028 {
2029   PetscErrorCode       ierr;
2030   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2031   PetscInt             i;
2032 
2033   PetscFunctionBegin;
2034   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
2035   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2036   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2037   for (i=0; i<redund->nrecvs; i++){
2038     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2039     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2040   }
2041   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2042   ierr = PetscFree(redund);CHKERRQ(ierr);
2043   PetscFunctionReturn(0);
2044 }
2045 
2046 #undef __FUNCT__
2047 #define __FUNCT__ "MatDestroy_MatRedundant"
2048 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2049 {
2050   PetscErrorCode  ierr;
2051   PetscContainer  container;
2052   Mat_Redundant   *redund=PETSC_NULL;
2053 
2054   PetscFunctionBegin;
2055   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2056   if (container) {
2057     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2058   } else {
2059     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2060   }
2061   A->ops->destroy = redund->MatDestroy;
2062   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2063   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2064   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2065   PetscFunctionReturn(0);
2066 }
2067 
2068 #undef __FUNCT__
2069 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2070 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2071 {
2072   PetscMPIInt    rank,size;
2073   MPI_Comm       comm=((PetscObject)mat)->comm;
2074   PetscErrorCode ierr;
2075   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2076   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2077   PetscInt       *rowrange=mat->rmap->range;
2078   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2079   Mat            A=aij->A,B=aij->B,C=*matredundant;
2080   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2081   PetscScalar    *sbuf_a;
2082   PetscInt       nzlocal=a->nz+b->nz;
2083   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2084   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2085   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2086   MatScalar      *aworkA,*aworkB;
2087   PetscScalar    *vals;
2088   PetscMPIInt    tag1,tag2,tag3,imdex;
2089   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2090                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2091   MPI_Status     recv_status,*send_status;
2092   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2093   PetscInt       **rbuf_j=PETSC_NULL;
2094   PetscScalar    **rbuf_a=PETSC_NULL;
2095   Mat_Redundant  *redund=PETSC_NULL;
2096   PetscContainer container;
2097 
2098   PetscFunctionBegin;
2099   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2100   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2101 
2102   if (reuse == MAT_REUSE_MATRIX) {
2103     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2104     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2105     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2106     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2107     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2108     if (container) {
2109       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2110     } else {
2111       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2112     }
2113     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2114 
2115     nsends    = redund->nsends;
2116     nrecvs    = redund->nrecvs;
2117     send_rank = redund->send_rank; recv_rank = send_rank + size;
2118     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
2119     sbuf_j    = redund->sbuf_j;
2120     sbuf_a    = redund->sbuf_a;
2121     rbuf_j    = redund->rbuf_j;
2122     rbuf_a    = redund->rbuf_a;
2123   }
2124 
2125   if (reuse == MAT_INITIAL_MATRIX){
2126     PetscMPIInt  subrank,subsize;
2127     PetscInt     nleftover,np_subcomm;
2128     /* get the destination processors' id send_rank, nsends and nrecvs */
2129     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2130     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2131     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
2132     recv_rank = send_rank + size;
2133     np_subcomm = size/nsubcomm;
2134     nleftover  = size - nsubcomm*np_subcomm;
2135     nsends = 0; nrecvs = 0;
2136     for (i=0; i<size; i++){ /* i=rank*/
2137       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2138         send_rank[nsends] = i; nsends++;
2139         recv_rank[nrecvs++] = i;
2140       }
2141     }
2142     if (rank >= size - nleftover){/* this proc is a leftover processor */
2143       i = size-nleftover-1;
2144       j = 0;
2145       while (j < nsubcomm - nleftover){
2146         send_rank[nsends++] = i;
2147         i--; j++;
2148       }
2149     }
2150 
2151     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2152       for (i=0; i<nleftover; i++){
2153         recv_rank[nrecvs++] = size-nleftover+i;
2154       }
2155     }
2156 
2157     /* allocate sbuf_j, sbuf_a */
2158     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2159     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2160     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2161   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2162 
2163   /* copy mat's local entries into the buffers */
2164   if (reuse == MAT_INITIAL_MATRIX){
2165     rownz_max = 0;
2166     rptr = sbuf_j;
2167     cols = sbuf_j + rend-rstart + 1;
2168     vals = sbuf_a;
2169     rptr[0] = 0;
2170     for (i=0; i<rend-rstart; i++){
2171       row = i + rstart;
2172       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2173       ncols  = nzA + nzB;
2174       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2175       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2176       /* load the column indices for this row into cols */
2177       lwrite = 0;
2178       for (l=0; l<nzB; l++) {
2179         if ((ctmp = bmap[cworkB[l]]) < cstart){
2180           vals[lwrite]   = aworkB[l];
2181           cols[lwrite++] = ctmp;
2182         }
2183       }
2184       for (l=0; l<nzA; l++){
2185         vals[lwrite]   = aworkA[l];
2186         cols[lwrite++] = cstart + cworkA[l];
2187       }
2188       for (l=0; l<nzB; l++) {
2189         if ((ctmp = bmap[cworkB[l]]) >= cend){
2190           vals[lwrite]   = aworkB[l];
2191           cols[lwrite++] = ctmp;
2192         }
2193       }
2194       vals += ncols;
2195       cols += ncols;
2196       rptr[i+1] = rptr[i] + ncols;
2197       if (rownz_max < ncols) rownz_max = ncols;
2198     }
2199     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2200   } else { /* only copy matrix values into sbuf_a */
2201     rptr = sbuf_j;
2202     vals = sbuf_a;
2203     rptr[0] = 0;
2204     for (i=0; i<rend-rstart; i++){
2205       row = i + rstart;
2206       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2207       ncols  = nzA + nzB;
2208       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2209       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2210       lwrite = 0;
2211       for (l=0; l<nzB; l++) {
2212         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2213       }
2214       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2215       for (l=0; l<nzB; l++) {
2216         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2217       }
2218       vals += ncols;
2219       rptr[i+1] = rptr[i] + ncols;
2220     }
2221   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2222 
2223   /* send nzlocal to others, and recv other's nzlocal */
2224   /*--------------------------------------------------*/
2225   if (reuse == MAT_INITIAL_MATRIX){
2226     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2227     s_waits2 = s_waits3 + nsends;
2228     s_waits1 = s_waits2 + nsends;
2229     r_waits1 = s_waits1 + nsends;
2230     r_waits2 = r_waits1 + nrecvs;
2231     r_waits3 = r_waits2 + nrecvs;
2232   } else {
2233     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2234     r_waits3 = s_waits3 + nsends;
2235   }
2236 
2237   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2238   if (reuse == MAT_INITIAL_MATRIX){
2239     /* get new tags to keep the communication clean */
2240     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2241     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2242     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2243     rbuf_nz = sbuf_nz + nsends;
2244 
2245     /* post receives of other's nzlocal */
2246     for (i=0; i<nrecvs; i++){
2247       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2248     }
2249     /* send nzlocal to others */
2250     for (i=0; i<nsends; i++){
2251       sbuf_nz[i] = nzlocal;
2252       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2253     }
2254     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2255     count = nrecvs;
2256     while (count) {
2257       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2258       recv_rank[imdex] = recv_status.MPI_SOURCE;
2259       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2260       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2261 
2262       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2263       rbuf_nz[imdex] += i + 2;
2264       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2265       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2266       count--;
2267     }
2268     /* wait on sends of nzlocal */
2269     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2270     /* send mat->i,j to others, and recv from other's */
2271     /*------------------------------------------------*/
2272     for (i=0; i<nsends; i++){
2273       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2274       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2275     }
2276     /* wait on receives of mat->i,j */
2277     /*------------------------------*/
2278     count = nrecvs;
2279     while (count) {
2280       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2281       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2282       count--;
2283     }
2284     /* wait on sends of mat->i,j */
2285     /*---------------------------*/
2286     if (nsends) {
2287       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2288     }
2289   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2290 
2291   /* post receives, send and receive mat->a */
2292   /*----------------------------------------*/
2293   for (imdex=0; imdex<nrecvs; imdex++) {
2294     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2295   }
2296   for (i=0; i<nsends; i++){
2297     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2298   }
2299   count = nrecvs;
2300   while (count) {
2301     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2302     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2303     count--;
2304   }
2305   if (nsends) {
2306     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2307   }
2308 
2309   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2310 
2311   /* create redundant matrix */
2312   /*-------------------------*/
2313   if (reuse == MAT_INITIAL_MATRIX){
2314     /* compute rownz_max for preallocation */
2315     for (imdex=0; imdex<nrecvs; imdex++){
2316       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2317       rptr = rbuf_j[imdex];
2318       for (i=0; i<j; i++){
2319         ncols = rptr[i+1] - rptr[i];
2320         if (rownz_max < ncols) rownz_max = ncols;
2321       }
2322     }
2323 
2324     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2325     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2326     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2327     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2328     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2329   } else {
2330     C = *matredundant;
2331   }
2332 
2333   /* insert local matrix entries */
2334   rptr = sbuf_j;
2335   cols = sbuf_j + rend-rstart + 1;
2336   vals = sbuf_a;
2337   for (i=0; i<rend-rstart; i++){
2338     row   = i + rstart;
2339     ncols = rptr[i+1] - rptr[i];
2340     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2341     vals += ncols;
2342     cols += ncols;
2343   }
2344   /* insert received matrix entries */
2345   for (imdex=0; imdex<nrecvs; imdex++){
2346     rstart = rowrange[recv_rank[imdex]];
2347     rend   = rowrange[recv_rank[imdex]+1];
2348     rptr = rbuf_j[imdex];
2349     cols = rbuf_j[imdex] + rend-rstart + 1;
2350     vals = rbuf_a[imdex];
2351     for (i=0; i<rend-rstart; i++){
2352       row   = i + rstart;
2353       ncols = rptr[i+1] - rptr[i];
2354       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2355       vals += ncols;
2356       cols += ncols;
2357     }
2358   }
2359   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2360   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2361   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2362   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2363   if (reuse == MAT_INITIAL_MATRIX){
2364     PetscContainer container;
2365     *matredundant = C;
2366     /* create a supporting struct and attach it to C for reuse */
2367     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2368     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2369     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2370     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2371     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2372 
2373     redund->nzlocal = nzlocal;
2374     redund->nsends  = nsends;
2375     redund->nrecvs  = nrecvs;
2376     redund->send_rank = send_rank;
2377     redund->sbuf_nz = sbuf_nz;
2378     redund->sbuf_j  = sbuf_j;
2379     redund->sbuf_a  = sbuf_a;
2380     redund->rbuf_j  = rbuf_j;
2381     redund->rbuf_a  = rbuf_a;
2382 
2383     redund->MatDestroy = C->ops->destroy;
2384     C->ops->destroy    = MatDestroy_MatRedundant;
2385   }
2386   PetscFunctionReturn(0);
2387 }
2388 
2389 #undef __FUNCT__
2390 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2391 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2392 {
2393   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2394   PetscErrorCode ierr;
2395   PetscInt       i,*idxb = 0;
2396   PetscScalar    *va,*vb;
2397   Vec            vtmp;
2398 
2399   PetscFunctionBegin;
2400   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2401   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2402   if (idx) {
2403     for (i=0; i<A->rmap->n; i++) {
2404       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2405     }
2406   }
2407 
2408   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2409   if (idx) {
2410     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2411   }
2412   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2413   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2414 
2415   for (i=0; i<A->rmap->n; i++){
2416     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2417       va[i] = vb[i];
2418       if (idx) idx[i] = a->garray[idxb[i]];
2419     }
2420   }
2421 
2422   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2423   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2424   if (idxb) {
2425     ierr = PetscFree(idxb);CHKERRQ(ierr);
2426   }
2427   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2428   PetscFunctionReturn(0);
2429 }
2430 
2431 #undef __FUNCT__
2432 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2433 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2434 {
2435   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2436   PetscErrorCode ierr;
2437   PetscInt       i,*idxb = 0;
2438   PetscScalar    *va,*vb;
2439   Vec            vtmp;
2440 
2441   PetscFunctionBegin;
2442   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2443   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2444   if (idx) {
2445     for (i=0; i<A->cmap->n; i++) {
2446       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2447     }
2448   }
2449 
2450   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2451   if (idx) {
2452     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2453   }
2454   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2455   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2456 
2457   for (i=0; i<A->rmap->n; i++){
2458     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2459       va[i] = vb[i];
2460       if (idx) idx[i] = a->garray[idxb[i]];
2461     }
2462   }
2463 
2464   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2465   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2466   if (idxb) {
2467     ierr = PetscFree(idxb);CHKERRQ(ierr);
2468   }
2469   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2470   PetscFunctionReturn(0);
2471 }
2472 
2473 #undef __FUNCT__
2474 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2475 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2476 {
2477   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2478   PetscInt       n      = A->rmap->n;
2479   PetscInt       cstart = A->cmap->rstart;
2480   PetscInt      *cmap   = mat->garray;
2481   PetscInt      *diagIdx, *offdiagIdx;
2482   Vec            diagV, offdiagV;
2483   PetscScalar   *a, *diagA, *offdiagA;
2484   PetscInt       r;
2485   PetscErrorCode ierr;
2486 
2487   PetscFunctionBegin;
2488   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2489   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2490   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2491   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2492   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2493   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2494   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2495   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2496   for(r = 0; r < n; ++r) {
2497     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2498       a[r]   = diagA[r];
2499       idx[r] = cstart + diagIdx[r];
2500     } else {
2501       a[r]   = offdiagA[r];
2502       idx[r] = cmap[offdiagIdx[r]];
2503     }
2504   }
2505   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2506   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2507   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2508   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2509   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2510   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2511   PetscFunctionReturn(0);
2512 }
2513 
2514 #undef __FUNCT__
2515 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2516 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2517 {
2518   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2519   PetscInt       n      = A->rmap->n;
2520   PetscInt       cstart = A->cmap->rstart;
2521   PetscInt      *cmap   = mat->garray;
2522   PetscInt      *diagIdx, *offdiagIdx;
2523   Vec            diagV, offdiagV;
2524   PetscScalar   *a, *diagA, *offdiagA;
2525   PetscInt       r;
2526   PetscErrorCode ierr;
2527 
2528   PetscFunctionBegin;
2529   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2530   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2531   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2532   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2533   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2534   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2535   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2536   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2537   for(r = 0; r < n; ++r) {
2538     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2539       a[r]   = diagA[r];
2540       idx[r] = cstart + diagIdx[r];
2541     } else {
2542       a[r]   = offdiagA[r];
2543       idx[r] = cmap[offdiagIdx[r]];
2544     }
2545   }
2546   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2547   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2548   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2549   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2550   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2551   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2552   PetscFunctionReturn(0);
2553 }
2554 
2555 #undef __FUNCT__
2556 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2557 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[])
2558 {
2559   PetscErrorCode ierr;
2560 
2561   PetscFunctionBegin;
2562   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
2563   PetscFunctionReturn(0);
2564 }
2565 
2566 /* -------------------------------------------------------------------*/
2567 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2568        MatGetRow_MPIAIJ,
2569        MatRestoreRow_MPIAIJ,
2570        MatMult_MPIAIJ,
2571 /* 4*/ MatMultAdd_MPIAIJ,
2572        MatMultTranspose_MPIAIJ,
2573        MatMultTransposeAdd_MPIAIJ,
2574 #ifdef PETSC_HAVE_PBGL
2575        MatSolve_MPIAIJ,
2576 #else
2577        0,
2578 #endif
2579        0,
2580        0,
2581 /*10*/ 0,
2582        0,
2583        0,
2584        MatRelax_MPIAIJ,
2585        MatTranspose_MPIAIJ,
2586 /*15*/ MatGetInfo_MPIAIJ,
2587        MatEqual_MPIAIJ,
2588        MatGetDiagonal_MPIAIJ,
2589        MatDiagonalScale_MPIAIJ,
2590        MatNorm_MPIAIJ,
2591 /*20*/ MatAssemblyBegin_MPIAIJ,
2592        MatAssemblyEnd_MPIAIJ,
2593        MatSetOption_MPIAIJ,
2594        MatZeroEntries_MPIAIJ,
2595 /*24*/ MatZeroRows_MPIAIJ,
2596        0,
2597 #ifdef PETSC_HAVE_PBGL
2598        0,
2599 #else
2600        0,
2601 #endif
2602        0,
2603        0,
2604 /*29*/ MatSetUpPreallocation_MPIAIJ,
2605 #ifdef PETSC_HAVE_PBGL
2606        0,
2607 #else
2608        0,
2609 #endif
2610        0,
2611        0,
2612        0,
2613 /*34*/ MatDuplicate_MPIAIJ,
2614        0,
2615        0,
2616        0,
2617        0,
2618 /*39*/ MatAXPY_MPIAIJ,
2619        MatGetSubMatrices_MPIAIJ,
2620        MatIncreaseOverlap_MPIAIJ,
2621        MatGetValues_MPIAIJ,
2622        MatCopy_MPIAIJ,
2623 /*44*/ MatGetRowMax_MPIAIJ,
2624        MatScale_MPIAIJ,
2625        0,
2626        0,
2627        0,
2628 /*49*/ MatSetBlockSize_MPIAIJ,
2629        0,
2630        0,
2631        0,
2632        0,
2633 /*54*/ MatFDColoringCreate_MPIAIJ,
2634        0,
2635        MatSetUnfactored_MPIAIJ,
2636        MatPermute_MPIAIJ,
2637        0,
2638 /*59*/ MatGetSubMatrix_MPIAIJ,
2639        MatDestroy_MPIAIJ,
2640        MatView_MPIAIJ,
2641        0,
2642        0,
2643 /*64*/ 0,
2644        0,
2645        0,
2646        0,
2647        0,
2648 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2649        MatGetRowMinAbs_MPIAIJ,
2650        0,
2651        MatSetColoring_MPIAIJ,
2652 #if defined(PETSC_HAVE_ADIC)
2653        MatSetValuesAdic_MPIAIJ,
2654 #else
2655        0,
2656 #endif
2657        MatSetValuesAdifor_MPIAIJ,
2658 /*75*/ 0,
2659        0,
2660        0,
2661        0,
2662        0,
2663 /*80*/ 0,
2664        0,
2665        0,
2666 /*83*/ MatLoad_MPIAIJ,
2667        0,
2668        0,
2669        0,
2670        0,
2671        0,
2672 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2673        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2674        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2675        MatPtAP_Basic,
2676        MatPtAPSymbolic_MPIAIJ,
2677 /*94*/ MatPtAPNumeric_MPIAIJ,
2678        0,
2679        0,
2680        0,
2681        0,
2682 /*99*/ 0,
2683        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2684        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2685        MatConjugate_MPIAIJ,
2686        0,
2687 /*104*/MatSetValuesRow_MPIAIJ,
2688        MatRealPart_MPIAIJ,
2689        MatImaginaryPart_MPIAIJ,
2690        0,
2691        0,
2692 /*109*/0,
2693        MatGetRedundantMatrix_MPIAIJ,
2694        MatGetRowMin_MPIAIJ,
2695        0,
2696        0,
2697 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
2698        0,
2699        0,
2700        0,
2701        0,
2702        0
2703 };
2704 
2705 /* ----------------------------------------------------------------------------------------*/
2706 
2707 EXTERN_C_BEGIN
2708 #undef __FUNCT__
2709 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2710 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2711 {
2712   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2713   PetscErrorCode ierr;
2714 
2715   PetscFunctionBegin;
2716   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2717   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2718   PetscFunctionReturn(0);
2719 }
2720 EXTERN_C_END
2721 
2722 EXTERN_C_BEGIN
2723 #undef __FUNCT__
2724 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2725 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2726 {
2727   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2728   PetscErrorCode ierr;
2729 
2730   PetscFunctionBegin;
2731   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2732   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2733   PetscFunctionReturn(0);
2734 }
2735 EXTERN_C_END
2736 
2737 #include "petscpc.h"
2738 EXTERN_C_BEGIN
2739 #undef __FUNCT__
2740 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2741 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2742 {
2743   Mat_MPIAIJ     *b;
2744   PetscErrorCode ierr;
2745   PetscInt       i;
2746 
2747   PetscFunctionBegin;
2748   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2749   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2750   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2751   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2752 
2753   ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2754   ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2755   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2756   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2757   if (d_nnz) {
2758     for (i=0; i<B->rmap->n; i++) {
2759       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2760     }
2761   }
2762   if (o_nnz) {
2763     for (i=0; i<B->rmap->n; i++) {
2764       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2765     }
2766   }
2767   b = (Mat_MPIAIJ*)B->data;
2768 
2769   if (!B->preallocated) {
2770     /* Explicitly create 2 MATSEQAIJ matrices. */
2771     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2772     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2773     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2774     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2775     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2776     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2777     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2778     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2779   }
2780 
2781   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2782   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2783   B->preallocated = PETSC_TRUE;
2784   PetscFunctionReturn(0);
2785 }
2786 EXTERN_C_END
2787 
2788 #undef __FUNCT__
2789 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2790 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2791 {
2792   Mat            mat;
2793   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2794   PetscErrorCode ierr;
2795 
2796   PetscFunctionBegin;
2797   *newmat       = 0;
2798   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2799   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2800   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2801   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2802   a    = (Mat_MPIAIJ*)mat->data;
2803 
2804   mat->factor       = matin->factor;
2805   mat->rmap->bs      = matin->rmap->bs;
2806   mat->assembled    = PETSC_TRUE;
2807   mat->insertmode   = NOT_SET_VALUES;
2808   mat->preallocated = PETSC_TRUE;
2809 
2810   a->size           = oldmat->size;
2811   a->rank           = oldmat->rank;
2812   a->donotstash     = oldmat->donotstash;
2813   a->roworiented    = oldmat->roworiented;
2814   a->rowindices     = 0;
2815   a->rowvalues      = 0;
2816   a->getrowactive   = PETSC_FALSE;
2817 
2818   ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr);
2819   ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr);
2820 
2821   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2822   if (oldmat->colmap) {
2823 #if defined (PETSC_USE_CTABLE)
2824     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2825 #else
2826     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2827     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2828     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2829 #endif
2830   } else a->colmap = 0;
2831   if (oldmat->garray) {
2832     PetscInt len;
2833     len  = oldmat->B->cmap->n;
2834     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2835     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2836     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2837   } else a->garray = 0;
2838 
2839   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2840   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2841   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2842   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2843   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2844   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2845   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2846   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2847   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2848   *newmat = mat;
2849   PetscFunctionReturn(0);
2850 }
2851 
2852 #include "petscsys.h"
2853 
2854 #undef __FUNCT__
2855 #define __FUNCT__ "MatLoad_MPIAIJ"
2856 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2857 {
2858   Mat            A;
2859   PetscScalar    *vals,*svals;
2860   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2861   MPI_Status     status;
2862   PetscErrorCode ierr;
2863   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz;
2864   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2865   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2866   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2867   PetscInt       cend,cstart,n,*rowners;
2868   int            fd;
2869 
2870   PetscFunctionBegin;
2871   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2872   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2873   if (!rank) {
2874     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2875     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2876     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2877   }
2878 
2879   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2880   M = header[1]; N = header[2];
2881   /* determine ownership of all rows */
2882   m    = M/size + ((M % size) > rank);
2883   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2884   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2885 
2886   /* First process needs enough room for process with most rows */
2887   if (!rank) {
2888     mmax       = rowners[1];
2889     for (i=2; i<size; i++) {
2890       mmax = PetscMax(mmax,rowners[i]);
2891     }
2892   } else mmax = m;
2893 
2894   rowners[0] = 0;
2895   for (i=2; i<=size; i++) {
2896     rowners[i] += rowners[i-1];
2897   }
2898   rstart = rowners[rank];
2899   rend   = rowners[rank+1];
2900 
2901   /* distribute row lengths to all processors */
2902   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2903   if (!rank) {
2904     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2905     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2906     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2907     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2908     for (j=0; j<m; j++) {
2909       procsnz[0] += ourlens[j];
2910     }
2911     for (i=1; i<size; i++) {
2912       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2913       /* calculate the number of nonzeros on each processor */
2914       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2915         procsnz[i] += rowlengths[j];
2916       }
2917       mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]);
2918       ierr   = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2919     }
2920     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2921   } else {
2922     mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr);
2923     ierr   = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2924   }
2925 
2926   if (!rank) {
2927     /* determine max buffer needed and allocate it */
2928     maxnz = 0;
2929     for (i=0; i<size; i++) {
2930       maxnz = PetscMax(maxnz,procsnz[i]);
2931     }
2932     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2933 
2934     /* read in my part of the matrix column indices  */
2935     nz   = procsnz[0];
2936     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2937     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2938 
2939     /* read in every one elses and ship off */
2940     for (i=1; i<size; i++) {
2941       nz     = procsnz[i];
2942       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2943       mpicnt = PetscMPIIntCast(nz);
2944       ierr   = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2945     }
2946     ierr = PetscFree(cols);CHKERRQ(ierr);
2947   } else {
2948     /* determine buffer space needed for message */
2949     nz = 0;
2950     for (i=0; i<m; i++) {
2951       nz += ourlens[i];
2952     }
2953     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2954 
2955     /* receive message of column indices*/
2956     mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr);
2957     ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2958     ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr);
2959     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
2960     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
2961     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
2962   }
2963 
2964   /* determine column ownership if matrix is not square */
2965   if (N != M) {
2966     n      = N/size + ((N % size) > rank);
2967     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2968     cstart = cend - n;
2969   } else {
2970     cstart = rstart;
2971     cend   = rend;
2972     n      = cend - cstart;
2973   }
2974 
2975   /* loop over local rows, determining number of off diagonal entries */
2976   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2977   jj = 0;
2978   for (i=0; i<m; i++) {
2979     for (j=0; j<ourlens[i]; j++) {
2980       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2981       jj++;
2982     }
2983   }
2984 
2985   /* create our matrix */
2986   for (i=0; i<m; i++) {
2987     ourlens[i] -= offlens[i];
2988   }
2989   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2990   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2991   ierr = MatSetType(A,type);CHKERRQ(ierr);
2992   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2993 
2994   for (i=0; i<m; i++) {
2995     ourlens[i] += offlens[i];
2996   }
2997 
2998   if (!rank) {
2999     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3000 
3001     /* read in my part of the matrix numerical values  */
3002     nz   = procsnz[0];
3003     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3004 
3005     /* insert into matrix */
3006     jj      = rstart;
3007     smycols = mycols;
3008     svals   = vals;
3009     for (i=0; i<m; i++) {
3010       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3011       smycols += ourlens[i];
3012       svals   += ourlens[i];
3013       jj++;
3014     }
3015 
3016     /* read in other processors and ship out */
3017     for (i=1; i<size; i++) {
3018       nz     = procsnz[i];
3019       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3020       mpicnt = PetscMPIIntCast(nz);
3021       ierr   = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3022     }
3023     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3024   } else {
3025     /* receive numeric values */
3026     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3027 
3028     /* receive message of values*/
3029     mpicnt = PetscMPIIntCast(nz);
3030     ierr   = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
3031     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr);
3032     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
3033     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
3034     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
3035 
3036     /* insert into matrix */
3037     jj      = rstart;
3038     smycols = mycols;
3039     svals   = vals;
3040     for (i=0; i<m; i++) {
3041       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3042       smycols += ourlens[i];
3043       svals   += ourlens[i];
3044       jj++;
3045     }
3046   }
3047   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3048   ierr = PetscFree(vals);CHKERRQ(ierr);
3049   ierr = PetscFree(mycols);CHKERRQ(ierr);
3050   ierr = PetscFree(rowners);CHKERRQ(ierr);
3051 
3052   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3053   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3054   *newmat = A;
3055   PetscFunctionReturn(0);
3056 }
3057 
3058 #undef __FUNCT__
3059 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3060 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3061 {
3062   PetscErrorCode ierr;
3063   IS             iscol_local;
3064   PetscInt       csize;
3065 
3066   PetscFunctionBegin;
3067   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3068   if (call == MAT_REUSE_MATRIX) {
3069     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3070     if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3071   } else {
3072     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3073   }
3074   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3075   if (call == MAT_INITIAL_MATRIX) {
3076     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3077     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3078   }
3079   PetscFunctionReturn(0);
3080 }
3081 
3082 #undef __FUNCT__
3083 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3084 /*
3085     Not great since it makes two copies of the submatrix, first an SeqAIJ
3086   in local and then by concatenating the local matrices the end result.
3087   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3088 
3089   Note: This requires a sequential iscol with all indices.
3090 */
3091 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3092 {
3093   PetscErrorCode ierr;
3094   PetscMPIInt    rank,size;
3095   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3096   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3097   Mat            *local,M,Mreuse;
3098   MatScalar      *vwork,*aa;
3099   MPI_Comm       comm = ((PetscObject)mat)->comm;
3100   Mat_SeqAIJ     *aij;
3101 
3102 
3103   PetscFunctionBegin;
3104   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3105   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3106 
3107   if (call ==  MAT_REUSE_MATRIX) {
3108     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3109     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3110     local = &Mreuse;
3111     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3112   } else {
3113     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3114     Mreuse = *local;
3115     ierr   = PetscFree(local);CHKERRQ(ierr);
3116   }
3117 
3118   /*
3119       m - number of local rows
3120       n - number of columns (same on all processors)
3121       rstart - first row in new global matrix generated
3122   */
3123   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3124   if (call == MAT_INITIAL_MATRIX) {
3125     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3126     ii  = aij->i;
3127     jj  = aij->j;
3128 
3129     /*
3130         Determine the number of non-zeros in the diagonal and off-diagonal
3131         portions of the matrix in order to do correct preallocation
3132     */
3133 
3134     /* first get start and end of "diagonal" columns */
3135     if (csize == PETSC_DECIDE) {
3136       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3137       if (mglobal == n) { /* square matrix */
3138 	nlocal = m;
3139       } else {
3140         nlocal = n/size + ((n % size) > rank);
3141       }
3142     } else {
3143       nlocal = csize;
3144     }
3145     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3146     rstart = rend - nlocal;
3147     if (rank == size - 1 && rend != n) {
3148       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3149     }
3150 
3151     /* next, compute all the lengths */
3152     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3153     olens = dlens + m;
3154     for (i=0; i<m; i++) {
3155       jend = ii[i+1] - ii[i];
3156       olen = 0;
3157       dlen = 0;
3158       for (j=0; j<jend; j++) {
3159         if (*jj < rstart || *jj >= rend) olen++;
3160         else dlen++;
3161         jj++;
3162       }
3163       olens[i] = olen;
3164       dlens[i] = dlen;
3165     }
3166     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3167     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3168     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3169     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3170     ierr = PetscFree(dlens);CHKERRQ(ierr);
3171   } else {
3172     PetscInt ml,nl;
3173 
3174     M = *newmat;
3175     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3176     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3177     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3178     /*
3179          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3180        rather than the slower MatSetValues().
3181     */
3182     M->was_assembled = PETSC_TRUE;
3183     M->assembled     = PETSC_FALSE;
3184   }
3185   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3186   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3187   ii  = aij->i;
3188   jj  = aij->j;
3189   aa  = aij->a;
3190   for (i=0; i<m; i++) {
3191     row   = rstart + i;
3192     nz    = ii[i+1] - ii[i];
3193     cwork = jj;     jj += nz;
3194     vwork = aa;     aa += nz;
3195     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3196   }
3197 
3198   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3199   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3200   *newmat = M;
3201 
3202   /* save submatrix used in processor for next request */
3203   if (call ==  MAT_INITIAL_MATRIX) {
3204     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3205     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3206   }
3207 
3208   PetscFunctionReturn(0);
3209 }
3210 
3211 EXTERN_C_BEGIN
3212 #undef __FUNCT__
3213 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3214 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3215 {
3216   PetscInt       m,cstart, cend,j,nnz,i,d;
3217   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3218   const PetscInt *JJ;
3219   PetscScalar    *values;
3220   PetscErrorCode ierr;
3221 
3222   PetscFunctionBegin;
3223   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3224 
3225   ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3226   ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3227   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
3228   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
3229   m      = B->rmap->n;
3230   cstart = B->cmap->rstart;
3231   cend   = B->cmap->rend;
3232   rstart = B->rmap->rstart;
3233 
3234   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
3235   o_nnz = d_nnz + m;
3236 
3237 #if defined(PETSC_USE_DEBUGGING)
3238   for (i=0; i<m; i++) {
3239     nnz     = Ii[i+1]- Ii[i];
3240     JJ      = J + Ii[i];
3241     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3242     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3243     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3244     for (j=1; j<nnz; j++) {
3245       if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j);
3246     }
3247   }
3248 #endif
3249 
3250   for (i=0; i<m; i++) {
3251     nnz     = Ii[i+1]- Ii[i];
3252     JJ      = J + Ii[i];
3253     nnz_max = PetscMax(nnz_max,nnz);
3254     for (j=0; j<nnz; j++) {
3255       if (*JJ >= cstart) break;
3256       JJ++;
3257     }
3258     d = 0;
3259     for (; j<nnz; j++) {
3260       if (*JJ++ >= cend) break;
3261       d++;
3262     }
3263     d_nnz[i] = d;
3264     o_nnz[i] = nnz - d;
3265   }
3266   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3267   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
3268 
3269   if (v) values = (PetscScalar*)v;
3270   else {
3271     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3272     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3273   }
3274 
3275   for (i=0; i<m; i++) {
3276     ii   = i + rstart;
3277     nnz  = Ii[i+1]- Ii[i];
3278     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3279   }
3280   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3281   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3282 
3283   if (!v) {
3284     ierr = PetscFree(values);CHKERRQ(ierr);
3285   }
3286   PetscFunctionReturn(0);
3287 }
3288 EXTERN_C_END
3289 
3290 #undef __FUNCT__
3291 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3292 /*@
3293    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3294    (the default parallel PETSc format).
3295 
3296    Collective on MPI_Comm
3297 
3298    Input Parameters:
3299 +  B - the matrix
3300 .  i - the indices into j for the start of each local row (starts with zero)
3301 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3302 -  v - optional values in the matrix
3303 
3304    Level: developer
3305 
3306    Notes:
3307        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3308      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3309      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3310 
3311        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3312 
3313        The format which is used for the sparse matrix input, is equivalent to a
3314     row-major ordering.. i.e for the following matrix, the input data expected is
3315     as shown:
3316 
3317         1 0 0
3318         2 0 3     P0
3319        -------
3320         4 5 6     P1
3321 
3322      Process0 [P0]: rows_owned=[0,1]
3323         i =  {0,1,3}  [size = nrow+1  = 2+1]
3324         j =  {0,0,2}  [size = nz = 6]
3325         v =  {1,2,3}  [size = nz = 6]
3326 
3327      Process1 [P1]: rows_owned=[2]
3328         i =  {0,3}    [size = nrow+1  = 1+1]
3329         j =  {0,1,2}  [size = nz = 6]
3330         v =  {4,5,6}  [size = nz = 6]
3331 
3332       The column indices for each row MUST be sorted.
3333 
3334 .keywords: matrix, aij, compressed row, sparse, parallel
3335 
3336 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3337           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3338 @*/
3339 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3340 {
3341   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3342 
3343   PetscFunctionBegin;
3344   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3345   if (f) {
3346     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3347   }
3348   PetscFunctionReturn(0);
3349 }
3350 
3351 #undef __FUNCT__
3352 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3353 /*@C
3354    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3355    (the default parallel PETSc format).  For good matrix assembly performance
3356    the user should preallocate the matrix storage by setting the parameters
3357    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3358    performance can be increased by more than a factor of 50.
3359 
3360    Collective on MPI_Comm
3361 
3362    Input Parameters:
3363 +  A - the matrix
3364 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3365            (same value is used for all local rows)
3366 .  d_nnz - array containing the number of nonzeros in the various rows of the
3367            DIAGONAL portion of the local submatrix (possibly different for each row)
3368            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3369            The size of this array is equal to the number of local rows, i.e 'm'.
3370            You must leave room for the diagonal entry even if it is zero.
3371 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3372            submatrix (same value is used for all local rows).
3373 -  o_nnz - array containing the number of nonzeros in the various rows of the
3374            OFF-DIAGONAL portion of the local submatrix (possibly different for
3375            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3376            structure. The size of this array is equal to the number
3377            of local rows, i.e 'm'.
3378 
3379    If the *_nnz parameter is given then the *_nz parameter is ignored
3380 
3381    The AIJ format (also called the Yale sparse matrix format or
3382    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3383    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3384 
3385    The parallel matrix is partitioned such that the first m0 rows belong to
3386    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3387    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3388 
3389    The DIAGONAL portion of the local submatrix of a processor can be defined
3390    as the submatrix which is obtained by extraction the part corresponding
3391    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3392    first row that belongs to the processor, and r2 is the last row belonging
3393    to the this processor. This is a square mxm matrix. The remaining portion
3394    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3395 
3396    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3397 
3398    You can call MatGetInfo() to get information on how effective the preallocation was;
3399    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3400    You can also run with the option -info and look for messages with the string
3401    malloc in them to see if additional memory allocation was needed.
3402 
3403    Example usage:
3404 
3405    Consider the following 8x8 matrix with 34 non-zero values, that is
3406    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3407    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3408    as follows:
3409 
3410 .vb
3411             1  2  0  |  0  3  0  |  0  4
3412     Proc0   0  5  6  |  7  0  0  |  8  0
3413             9  0 10  | 11  0  0  | 12  0
3414     -------------------------------------
3415            13  0 14  | 15 16 17  |  0  0
3416     Proc1   0 18  0  | 19 20 21  |  0  0
3417             0  0  0  | 22 23  0  | 24  0
3418     -------------------------------------
3419     Proc2  25 26 27  |  0  0 28  | 29  0
3420            30  0  0  | 31 32 33  |  0 34
3421 .ve
3422 
3423    This can be represented as a collection of submatrices as:
3424 
3425 .vb
3426       A B C
3427       D E F
3428       G H I
3429 .ve
3430 
3431    Where the submatrices A,B,C are owned by proc0, D,E,F are
3432    owned by proc1, G,H,I are owned by proc2.
3433 
3434    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3435    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3436    The 'M','N' parameters are 8,8, and have the same values on all procs.
3437 
3438    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3439    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3440    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3441    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3442    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3443    matrix, ans [DF] as another SeqAIJ matrix.
3444 
3445    When d_nz, o_nz parameters are specified, d_nz storage elements are
3446    allocated for every row of the local diagonal submatrix, and o_nz
3447    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3448    One way to choose d_nz and o_nz is to use the max nonzerors per local
3449    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3450    In this case, the values of d_nz,o_nz are:
3451 .vb
3452      proc0 : dnz = 2, o_nz = 2
3453      proc1 : dnz = 3, o_nz = 2
3454      proc2 : dnz = 1, o_nz = 4
3455 .ve
3456    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3457    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3458    for proc3. i.e we are using 12+15+10=37 storage locations to store
3459    34 values.
3460 
3461    When d_nnz, o_nnz parameters are specified, the storage is specified
3462    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3463    In the above case the values for d_nnz,o_nnz are:
3464 .vb
3465      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3466      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3467      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3468 .ve
3469    Here the space allocated is sum of all the above values i.e 34, and
3470    hence pre-allocation is perfect.
3471 
3472    Level: intermediate
3473 
3474 .keywords: matrix, aij, compressed row, sparse, parallel
3475 
3476 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3477           MPIAIJ, MatGetInfo()
3478 @*/
3479 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3480 {
3481   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3482 
3483   PetscFunctionBegin;
3484   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3485   if (f) {
3486     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3487   }
3488   PetscFunctionReturn(0);
3489 }
3490 
3491 #undef __FUNCT__
3492 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3493 /*@
3494      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3495          CSR format the local rows.
3496 
3497    Collective on MPI_Comm
3498 
3499    Input Parameters:
3500 +  comm - MPI communicator
3501 .  m - number of local rows (Cannot be PETSC_DECIDE)
3502 .  n - This value should be the same as the local size used in creating the
3503        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3504        calculated if N is given) For square matrices n is almost always m.
3505 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3506 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3507 .   i - row indices
3508 .   j - column indices
3509 -   a - matrix values
3510 
3511    Output Parameter:
3512 .   mat - the matrix
3513 
3514    Level: intermediate
3515 
3516    Notes:
3517        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3518      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3519      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3520 
3521        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3522 
3523        The format which is used for the sparse matrix input, is equivalent to a
3524     row-major ordering.. i.e for the following matrix, the input data expected is
3525     as shown:
3526 
3527         1 0 0
3528         2 0 3     P0
3529        -------
3530         4 5 6     P1
3531 
3532      Process0 [P0]: rows_owned=[0,1]
3533         i =  {0,1,3}  [size = nrow+1  = 2+1]
3534         j =  {0,0,2}  [size = nz = 6]
3535         v =  {1,2,3}  [size = nz = 6]
3536 
3537      Process1 [P1]: rows_owned=[2]
3538         i =  {0,3}    [size = nrow+1  = 1+1]
3539         j =  {0,1,2}  [size = nz = 6]
3540         v =  {4,5,6}  [size = nz = 6]
3541 
3542 .keywords: matrix, aij, compressed row, sparse, parallel
3543 
3544 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3545           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3546 @*/
3547 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3548 {
3549   PetscErrorCode ierr;
3550 
3551  PetscFunctionBegin;
3552   if (i[0]) {
3553     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3554   }
3555   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3556   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3557   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3558   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3559   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3560   PetscFunctionReturn(0);
3561 }
3562 
3563 #undef __FUNCT__
3564 #define __FUNCT__ "MatCreateMPIAIJ"
3565 /*@C
3566    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3567    (the default parallel PETSc format).  For good matrix assembly performance
3568    the user should preallocate the matrix storage by setting the parameters
3569    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3570    performance can be increased by more than a factor of 50.
3571 
3572    Collective on MPI_Comm
3573 
3574    Input Parameters:
3575 +  comm - MPI communicator
3576 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3577            This value should be the same as the local size used in creating the
3578            y vector for the matrix-vector product y = Ax.
3579 .  n - This value should be the same as the local size used in creating the
3580        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3581        calculated if N is given) For square matrices n is almost always m.
3582 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3583 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3584 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3585            (same value is used for all local rows)
3586 .  d_nnz - array containing the number of nonzeros in the various rows of the
3587            DIAGONAL portion of the local submatrix (possibly different for each row)
3588            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3589            The size of this array is equal to the number of local rows, i.e 'm'.
3590            You must leave room for the diagonal entry even if it is zero.
3591 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3592            submatrix (same value is used for all local rows).
3593 -  o_nnz - array containing the number of nonzeros in the various rows of the
3594            OFF-DIAGONAL portion of the local submatrix (possibly different for
3595            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3596            structure. The size of this array is equal to the number
3597            of local rows, i.e 'm'.
3598 
3599    Output Parameter:
3600 .  A - the matrix
3601 
3602    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3603    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3604    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3605 
3606    Notes:
3607    If the *_nnz parameter is given then the *_nz parameter is ignored
3608 
3609    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3610    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3611    storage requirements for this matrix.
3612 
3613    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3614    processor than it must be used on all processors that share the object for
3615    that argument.
3616 
3617    The user MUST specify either the local or global matrix dimensions
3618    (possibly both).
3619 
3620    The parallel matrix is partitioned across processors such that the
3621    first m0 rows belong to process 0, the next m1 rows belong to
3622    process 1, the next m2 rows belong to process 2 etc.. where
3623    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3624    values corresponding to [m x N] submatrix.
3625 
3626    The columns are logically partitioned with the n0 columns belonging
3627    to 0th partition, the next n1 columns belonging to the next
3628    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3629 
3630    The DIAGONAL portion of the local submatrix on any given processor
3631    is the submatrix corresponding to the rows and columns m,n
3632    corresponding to the given processor. i.e diagonal matrix on
3633    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3634    etc. The remaining portion of the local submatrix [m x (N-n)]
3635    constitute the OFF-DIAGONAL portion. The example below better
3636    illustrates this concept.
3637 
3638    For a square global matrix we define each processor's diagonal portion
3639    to be its local rows and the corresponding columns (a square submatrix);
3640    each processor's off-diagonal portion encompasses the remainder of the
3641    local matrix (a rectangular submatrix).
3642 
3643    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3644 
3645    When calling this routine with a single process communicator, a matrix of
3646    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3647    type of communicator, use the construction mechanism:
3648      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3649 
3650    By default, this format uses inodes (identical nodes) when possible.
3651    We search for consecutive rows with the same nonzero structure, thereby
3652    reusing matrix information to achieve increased efficiency.
3653 
3654    Options Database Keys:
3655 +  -mat_no_inode  - Do not use inodes
3656 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3657 -  -mat_aij_oneindex - Internally use indexing starting at 1
3658         rather than 0.  Note that when calling MatSetValues(),
3659         the user still MUST index entries starting at 0!
3660 
3661 
3662    Example usage:
3663 
3664    Consider the following 8x8 matrix with 34 non-zero values, that is
3665    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3666    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3667    as follows:
3668 
3669 .vb
3670             1  2  0  |  0  3  0  |  0  4
3671     Proc0   0  5  6  |  7  0  0  |  8  0
3672             9  0 10  | 11  0  0  | 12  0
3673     -------------------------------------
3674            13  0 14  | 15 16 17  |  0  0
3675     Proc1   0 18  0  | 19 20 21  |  0  0
3676             0  0  0  | 22 23  0  | 24  0
3677     -------------------------------------
3678     Proc2  25 26 27  |  0  0 28  | 29  0
3679            30  0  0  | 31 32 33  |  0 34
3680 .ve
3681 
3682    This can be represented as a collection of submatrices as:
3683 
3684 .vb
3685       A B C
3686       D E F
3687       G H I
3688 .ve
3689 
3690    Where the submatrices A,B,C are owned by proc0, D,E,F are
3691    owned by proc1, G,H,I are owned by proc2.
3692 
3693    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3694    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3695    The 'M','N' parameters are 8,8, and have the same values on all procs.
3696 
3697    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3698    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3699    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3700    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3701    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3702    matrix, ans [DF] as another SeqAIJ matrix.
3703 
3704    When d_nz, o_nz parameters are specified, d_nz storage elements are
3705    allocated for every row of the local diagonal submatrix, and o_nz
3706    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3707    One way to choose d_nz and o_nz is to use the max nonzerors per local
3708    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3709    In this case, the values of d_nz,o_nz are:
3710 .vb
3711      proc0 : dnz = 2, o_nz = 2
3712      proc1 : dnz = 3, o_nz = 2
3713      proc2 : dnz = 1, o_nz = 4
3714 .ve
3715    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3716    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3717    for proc3. i.e we are using 12+15+10=37 storage locations to store
3718    34 values.
3719 
3720    When d_nnz, o_nnz parameters are specified, the storage is specified
3721    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3722    In the above case the values for d_nnz,o_nnz are:
3723 .vb
3724      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3725      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3726      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3727 .ve
3728    Here the space allocated is sum of all the above values i.e 34, and
3729    hence pre-allocation is perfect.
3730 
3731    Level: intermediate
3732 
3733 .keywords: matrix, aij, compressed row, sparse, parallel
3734 
3735 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3736           MPIAIJ, MatCreateMPIAIJWithArrays()
3737 @*/
3738 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3739 {
3740   PetscErrorCode ierr;
3741   PetscMPIInt    size;
3742 
3743   PetscFunctionBegin;
3744   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3745   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3746   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3747   if (size > 1) {
3748     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3749     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3750   } else {
3751     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3752     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3753   }
3754   PetscFunctionReturn(0);
3755 }
3756 
3757 #undef __FUNCT__
3758 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3759 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3760 {
3761   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3762 
3763   PetscFunctionBegin;
3764   *Ad     = a->A;
3765   *Ao     = a->B;
3766   *colmap = a->garray;
3767   PetscFunctionReturn(0);
3768 }
3769 
3770 #undef __FUNCT__
3771 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3772 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3773 {
3774   PetscErrorCode ierr;
3775   PetscInt       i;
3776   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3777 
3778   PetscFunctionBegin;
3779   if (coloring->ctype == IS_COLORING_GLOBAL) {
3780     ISColoringValue *allcolors,*colors;
3781     ISColoring      ocoloring;
3782 
3783     /* set coloring for diagonal portion */
3784     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3785 
3786     /* set coloring for off-diagonal portion */
3787     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3788     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3789     for (i=0; i<a->B->cmap->n; i++) {
3790       colors[i] = allcolors[a->garray[i]];
3791     }
3792     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3793     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3794     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3795     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3796   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3797     ISColoringValue *colors;
3798     PetscInt        *larray;
3799     ISColoring      ocoloring;
3800 
3801     /* set coloring for diagonal portion */
3802     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3803     for (i=0; i<a->A->cmap->n; i++) {
3804       larray[i] = i + A->cmap->rstart;
3805     }
3806     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3807     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3808     for (i=0; i<a->A->cmap->n; i++) {
3809       colors[i] = coloring->colors[larray[i]];
3810     }
3811     ierr = PetscFree(larray);CHKERRQ(ierr);
3812     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3813     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3814     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3815 
3816     /* set coloring for off-diagonal portion */
3817     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3818     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3819     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3820     for (i=0; i<a->B->cmap->n; i++) {
3821       colors[i] = coloring->colors[larray[i]];
3822     }
3823     ierr = PetscFree(larray);CHKERRQ(ierr);
3824     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3825     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3826     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3827   } else {
3828     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3829   }
3830 
3831   PetscFunctionReturn(0);
3832 }
3833 
3834 #if defined(PETSC_HAVE_ADIC)
3835 #undef __FUNCT__
3836 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3837 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3838 {
3839   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3840   PetscErrorCode ierr;
3841 
3842   PetscFunctionBegin;
3843   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3844   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3845   PetscFunctionReturn(0);
3846 }
3847 #endif
3848 
3849 #undef __FUNCT__
3850 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3851 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3852 {
3853   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3854   PetscErrorCode ierr;
3855 
3856   PetscFunctionBegin;
3857   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3858   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3859   PetscFunctionReturn(0);
3860 }
3861 
3862 #undef __FUNCT__
3863 #define __FUNCT__ "MatMerge"
3864 /*@
3865       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3866                  matrices from each processor
3867 
3868     Collective on MPI_Comm
3869 
3870    Input Parameters:
3871 +    comm - the communicators the parallel matrix will live on
3872 .    inmat - the input sequential matrices
3873 .    n - number of local columns (or PETSC_DECIDE)
3874 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3875 
3876    Output Parameter:
3877 .    outmat - the parallel matrix generated
3878 
3879     Level: advanced
3880 
3881    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3882 
3883 @*/
3884 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3885 {
3886   PetscErrorCode ierr;
3887   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3888   PetscInt       *indx;
3889   PetscScalar    *values;
3890 
3891   PetscFunctionBegin;
3892   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3893   if (scall == MAT_INITIAL_MATRIX){
3894     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3895     if (n == PETSC_DECIDE){
3896       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3897     }
3898     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3899     rstart -= m;
3900 
3901     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3902     for (i=0;i<m;i++) {
3903       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3904       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3905       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3906     }
3907     /* This routine will ONLY return MPIAIJ type matrix */
3908     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3909     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3910     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3911     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3912     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3913 
3914   } else if (scall == MAT_REUSE_MATRIX){
3915     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3916   } else {
3917     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3918   }
3919 
3920   for (i=0;i<m;i++) {
3921     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3922     Ii    = i + rstart;
3923     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3924     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3925   }
3926   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3927   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3928   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3929 
3930   PetscFunctionReturn(0);
3931 }
3932 
3933 #undef __FUNCT__
3934 #define __FUNCT__ "MatFileSplit"
3935 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3936 {
3937   PetscErrorCode    ierr;
3938   PetscMPIInt       rank;
3939   PetscInt          m,N,i,rstart,nnz;
3940   size_t            len;
3941   const PetscInt    *indx;
3942   PetscViewer       out;
3943   char              *name;
3944   Mat               B;
3945   const PetscScalar *values;
3946 
3947   PetscFunctionBegin;
3948   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3949   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3950   /* Should this be the type of the diagonal block of A? */
3951   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3952   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3953   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3954   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3955   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3956   for (i=0;i<m;i++) {
3957     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3958     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3959     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3960   }
3961   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3962   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3963 
3964   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3965   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3966   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3967   sprintf(name,"%s.%d",outfile,rank);
3968   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3969   ierr = PetscFree(name);
3970   ierr = MatView(B,out);CHKERRQ(ierr);
3971   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3972   ierr = MatDestroy(B);CHKERRQ(ierr);
3973   PetscFunctionReturn(0);
3974 }
3975 
3976 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3977 #undef __FUNCT__
3978 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3979 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3980 {
3981   PetscErrorCode       ierr;
3982   Mat_Merge_SeqsToMPI  *merge;
3983   PetscContainer       container;
3984 
3985   PetscFunctionBegin;
3986   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3987   if (container) {
3988     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3989     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3990     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3991     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3992     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3993     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3994     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3995     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3996     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3997     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3998     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3999     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
4000 
4001     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4002     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4003   }
4004   ierr = PetscFree(merge);CHKERRQ(ierr);
4005 
4006   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4007   PetscFunctionReturn(0);
4008 }
4009 
4010 #include "../src/mat/utils/freespace.h"
4011 #include "petscbt.h"
4012 
4013 #undef __FUNCT__
4014 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4015 /*@C
4016       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4017                  matrices from each processor
4018 
4019     Collective on MPI_Comm
4020 
4021    Input Parameters:
4022 +    comm - the communicators the parallel matrix will live on
4023 .    seqmat - the input sequential matrices
4024 .    m - number of local rows (or PETSC_DECIDE)
4025 .    n - number of local columns (or PETSC_DECIDE)
4026 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4027 
4028    Output Parameter:
4029 .    mpimat - the parallel matrix generated
4030 
4031     Level: advanced
4032 
4033    Notes:
4034      The dimensions of the sequential matrix in each processor MUST be the same.
4035      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4036      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4037 @*/
4038 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4039 {
4040   PetscErrorCode       ierr;
4041   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4042   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4043   PetscMPIInt          size,rank,taga,*len_s;
4044   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4045   PetscInt             proc,m;
4046   PetscInt             **buf_ri,**buf_rj;
4047   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4048   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4049   MPI_Request          *s_waits,*r_waits;
4050   MPI_Status           *status;
4051   MatScalar            *aa=a->a;
4052   MatScalar            **abuf_r,*ba_i;
4053   Mat_Merge_SeqsToMPI  *merge;
4054   PetscContainer       container;
4055 
4056   PetscFunctionBegin;
4057   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4058 
4059   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4060   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4061 
4062   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4063   if (container) {
4064     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4065   }
4066   bi     = merge->bi;
4067   bj     = merge->bj;
4068   buf_ri = merge->buf_ri;
4069   buf_rj = merge->buf_rj;
4070 
4071   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4072   owners = merge->rowmap.range;
4073   len_s  = merge->len_s;
4074 
4075   /* send and recv matrix values */
4076   /*-----------------------------*/
4077   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4078   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4079 
4080   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4081   for (proc=0,k=0; proc<size; proc++){
4082     if (!len_s[proc]) continue;
4083     i = owners[proc];
4084     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4085     k++;
4086   }
4087 
4088   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4089   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4090   ierr = PetscFree(status);CHKERRQ(ierr);
4091 
4092   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4093   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4094 
4095   /* insert mat values of mpimat */
4096   /*----------------------------*/
4097   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4098   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4099   nextrow = buf_ri_k + merge->nrecv;
4100   nextai  = nextrow + merge->nrecv;
4101 
4102   for (k=0; k<merge->nrecv; k++){
4103     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4104     nrows = *(buf_ri_k[k]);
4105     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4106     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4107   }
4108 
4109   /* set values of ba */
4110   m = merge->rowmap.n;
4111   for (i=0; i<m; i++) {
4112     arow = owners[rank] + i;
4113     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4114     bnzi = bi[i+1] - bi[i];
4115     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4116 
4117     /* add local non-zero vals of this proc's seqmat into ba */
4118     anzi = ai[arow+1] - ai[arow];
4119     aj   = a->j + ai[arow];
4120     aa   = a->a + ai[arow];
4121     nextaj = 0;
4122     for (j=0; nextaj<anzi; j++){
4123       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4124         ba_i[j] += aa[nextaj++];
4125       }
4126     }
4127 
4128     /* add received vals into ba */
4129     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4130       /* i-th row */
4131       if (i == *nextrow[k]) {
4132         anzi = *(nextai[k]+1) - *nextai[k];
4133         aj   = buf_rj[k] + *(nextai[k]);
4134         aa   = abuf_r[k] + *(nextai[k]);
4135         nextaj = 0;
4136         for (j=0; nextaj<anzi; j++){
4137           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4138             ba_i[j] += aa[nextaj++];
4139           }
4140         }
4141         nextrow[k]++; nextai[k]++;
4142       }
4143     }
4144     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4145   }
4146   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4147   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4148 
4149   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4150   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4151   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4152   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4153   PetscFunctionReturn(0);
4154 }
4155 
4156 #undef __FUNCT__
4157 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4158 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4159 {
4160   PetscErrorCode       ierr;
4161   Mat                  B_mpi;
4162   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4163   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4164   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4165   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4166   PetscInt             len,proc,*dnz,*onz;
4167   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4168   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4169   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4170   MPI_Status           *status;
4171   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4172   PetscBT              lnkbt;
4173   Mat_Merge_SeqsToMPI  *merge;
4174   PetscContainer       container;
4175 
4176   PetscFunctionBegin;
4177   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4178 
4179   /* make sure it is a PETSc comm */
4180   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4181   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4182   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4183 
4184   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4185   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4186 
4187   /* determine row ownership */
4188   /*---------------------------------------------------------*/
4189   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
4190   merge->rowmap.n = m;
4191   merge->rowmap.N = M;
4192   merge->rowmap.bs = 1;
4193   ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr);
4194   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4195   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4196 
4197   m      = merge->rowmap.n;
4198   M      = merge->rowmap.N;
4199   owners = merge->rowmap.range;
4200 
4201   /* determine the number of messages to send, their lengths */
4202   /*---------------------------------------------------------*/
4203   len_s  = merge->len_s;
4204 
4205   len = 0;  /* length of buf_si[] */
4206   merge->nsend = 0;
4207   for (proc=0; proc<size; proc++){
4208     len_si[proc] = 0;
4209     if (proc == rank){
4210       len_s[proc] = 0;
4211     } else {
4212       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4213       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4214     }
4215     if (len_s[proc]) {
4216       merge->nsend++;
4217       nrows = 0;
4218       for (i=owners[proc]; i<owners[proc+1]; i++){
4219         if (ai[i+1] > ai[i]) nrows++;
4220       }
4221       len_si[proc] = 2*(nrows+1);
4222       len += len_si[proc];
4223     }
4224   }
4225 
4226   /* determine the number and length of messages to receive for ij-structure */
4227   /*-------------------------------------------------------------------------*/
4228   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4229   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4230 
4231   /* post the Irecv of j-structure */
4232   /*-------------------------------*/
4233   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4234   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4235 
4236   /* post the Isend of j-structure */
4237   /*--------------------------------*/
4238   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
4239   sj_waits = si_waits + merge->nsend;
4240 
4241   for (proc=0, k=0; proc<size; proc++){
4242     if (!len_s[proc]) continue;
4243     i = owners[proc];
4244     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4245     k++;
4246   }
4247 
4248   /* receives and sends of j-structure are complete */
4249   /*------------------------------------------------*/
4250   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4251   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4252 
4253   /* send and recv i-structure */
4254   /*---------------------------*/
4255   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4256   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4257 
4258   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4259   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4260   for (proc=0,k=0; proc<size; proc++){
4261     if (!len_s[proc]) continue;
4262     /* form outgoing message for i-structure:
4263          buf_si[0]:                 nrows to be sent
4264                [1:nrows]:           row index (global)
4265                [nrows+1:2*nrows+1]: i-structure index
4266     */
4267     /*-------------------------------------------*/
4268     nrows = len_si[proc]/2 - 1;
4269     buf_si_i    = buf_si + nrows+1;
4270     buf_si[0]   = nrows;
4271     buf_si_i[0] = 0;
4272     nrows = 0;
4273     for (i=owners[proc]; i<owners[proc+1]; i++){
4274       anzi = ai[i+1] - ai[i];
4275       if (anzi) {
4276         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4277         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4278         nrows++;
4279       }
4280     }
4281     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4282     k++;
4283     buf_si += len_si[proc];
4284   }
4285 
4286   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4287   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4288 
4289   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4290   for (i=0; i<merge->nrecv; i++){
4291     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4292   }
4293 
4294   ierr = PetscFree(len_si);CHKERRQ(ierr);
4295   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4296   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4297   ierr = PetscFree(si_waits);CHKERRQ(ierr);
4298   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4299   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4300   ierr = PetscFree(status);CHKERRQ(ierr);
4301 
4302   /* compute a local seq matrix in each processor */
4303   /*----------------------------------------------*/
4304   /* allocate bi array and free space for accumulating nonzero column info */
4305   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4306   bi[0] = 0;
4307 
4308   /* create and initialize a linked list */
4309   nlnk = N+1;
4310   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4311 
4312   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4313   len = 0;
4314   len  = ai[owners[rank+1]] - ai[owners[rank]];
4315   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4316   current_space = free_space;
4317 
4318   /* determine symbolic info for each local row */
4319   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4320   nextrow = buf_ri_k + merge->nrecv;
4321   nextai  = nextrow + merge->nrecv;
4322   for (k=0; k<merge->nrecv; k++){
4323     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4324     nrows = *buf_ri_k[k];
4325     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4326     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4327   }
4328 
4329   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4330   len = 0;
4331   for (i=0;i<m;i++) {
4332     bnzi   = 0;
4333     /* add local non-zero cols of this proc's seqmat into lnk */
4334     arow   = owners[rank] + i;
4335     anzi   = ai[arow+1] - ai[arow];
4336     aj     = a->j + ai[arow];
4337     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4338     bnzi += nlnk;
4339     /* add received col data into lnk */
4340     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4341       if (i == *nextrow[k]) { /* i-th row */
4342         anzi = *(nextai[k]+1) - *nextai[k];
4343         aj   = buf_rj[k] + *nextai[k];
4344         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4345         bnzi += nlnk;
4346         nextrow[k]++; nextai[k]++;
4347       }
4348     }
4349     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4350 
4351     /* if free space is not available, make more free space */
4352     if (current_space->local_remaining<bnzi) {
4353       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4354       nspacedouble++;
4355     }
4356     /* copy data into free space, then initialize lnk */
4357     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4358     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4359 
4360     current_space->array           += bnzi;
4361     current_space->local_used      += bnzi;
4362     current_space->local_remaining -= bnzi;
4363 
4364     bi[i+1] = bi[i] + bnzi;
4365   }
4366 
4367   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4368 
4369   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4370   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4371   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4372 
4373   /* create symbolic parallel matrix B_mpi */
4374   /*---------------------------------------*/
4375   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4376   if (n==PETSC_DECIDE) {
4377     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4378   } else {
4379     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4380   }
4381   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4382   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4383   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4384 
4385   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4386   B_mpi->assembled     = PETSC_FALSE;
4387   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4388   merge->bi            = bi;
4389   merge->bj            = bj;
4390   merge->buf_ri        = buf_ri;
4391   merge->buf_rj        = buf_rj;
4392   merge->coi           = PETSC_NULL;
4393   merge->coj           = PETSC_NULL;
4394   merge->owners_co     = PETSC_NULL;
4395 
4396   /* attach the supporting struct to B_mpi for reuse */
4397   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4398   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4399   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4400   *mpimat = B_mpi;
4401 
4402   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4403   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4404   PetscFunctionReturn(0);
4405 }
4406 
4407 #undef __FUNCT__
4408 #define __FUNCT__ "MatMerge_SeqsToMPI"
4409 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4410 {
4411   PetscErrorCode   ierr;
4412 
4413   PetscFunctionBegin;
4414   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4415   if (scall == MAT_INITIAL_MATRIX){
4416     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4417   }
4418   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4419   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4420   PetscFunctionReturn(0);
4421 }
4422 
4423 #undef __FUNCT__
4424 #define __FUNCT__ "MatGetLocalMat"
4425 /*@
4426      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4427 
4428     Not Collective
4429 
4430    Input Parameters:
4431 +    A - the matrix
4432 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4433 
4434    Output Parameter:
4435 .    A_loc - the local sequential matrix generated
4436 
4437     Level: developer
4438 
4439 @*/
4440 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4441 {
4442   PetscErrorCode  ierr;
4443   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4444   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4445   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4446   MatScalar       *aa=a->a,*ba=b->a,*cam;
4447   PetscScalar     *ca;
4448   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4449   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4450 
4451   PetscFunctionBegin;
4452   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4453   if (scall == MAT_INITIAL_MATRIX){
4454     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4455     ci[0] = 0;
4456     for (i=0; i<am; i++){
4457       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4458     }
4459     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4460     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4461     k = 0;
4462     for (i=0; i<am; i++) {
4463       ncols_o = bi[i+1] - bi[i];
4464       ncols_d = ai[i+1] - ai[i];
4465       /* off-diagonal portion of A */
4466       for (jo=0; jo<ncols_o; jo++) {
4467         col = cmap[*bj];
4468         if (col >= cstart) break;
4469         cj[k]   = col; bj++;
4470         ca[k++] = *ba++;
4471       }
4472       /* diagonal portion of A */
4473       for (j=0; j<ncols_d; j++) {
4474         cj[k]   = cstart + *aj++;
4475         ca[k++] = *aa++;
4476       }
4477       /* off-diagonal portion of A */
4478       for (j=jo; j<ncols_o; j++) {
4479         cj[k]   = cmap[*bj++];
4480         ca[k++] = *ba++;
4481       }
4482     }
4483     /* put together the new matrix */
4484     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4485     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4486     /* Since these are PETSc arrays, change flags to free them as necessary. */
4487     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4488     mat->free_a  = PETSC_TRUE;
4489     mat->free_ij = PETSC_TRUE;
4490     mat->nonew   = 0;
4491   } else if (scall == MAT_REUSE_MATRIX){
4492     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4493     ci = mat->i; cj = mat->j; cam = mat->a;
4494     for (i=0; i<am; i++) {
4495       /* off-diagonal portion of A */
4496       ncols_o = bi[i+1] - bi[i];
4497       for (jo=0; jo<ncols_o; jo++) {
4498         col = cmap[*bj];
4499         if (col >= cstart) break;
4500         *cam++ = *ba++; bj++;
4501       }
4502       /* diagonal portion of A */
4503       ncols_d = ai[i+1] - ai[i];
4504       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4505       /* off-diagonal portion of A */
4506       for (j=jo; j<ncols_o; j++) {
4507         *cam++ = *ba++; bj++;
4508       }
4509     }
4510   } else {
4511     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4512   }
4513 
4514   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4515   PetscFunctionReturn(0);
4516 }
4517 
4518 #undef __FUNCT__
4519 #define __FUNCT__ "MatGetLocalMatCondensed"
4520 /*@C
4521      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4522 
4523     Not Collective
4524 
4525    Input Parameters:
4526 +    A - the matrix
4527 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4528 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4529 
4530    Output Parameter:
4531 .    A_loc - the local sequential matrix generated
4532 
4533     Level: developer
4534 
4535 @*/
4536 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4537 {
4538   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4539   PetscErrorCode    ierr;
4540   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4541   IS                isrowa,iscola;
4542   Mat               *aloc;
4543 
4544   PetscFunctionBegin;
4545   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4546   if (!row){
4547     start = A->rmap->rstart; end = A->rmap->rend;
4548     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4549   } else {
4550     isrowa = *row;
4551   }
4552   if (!col){
4553     start = A->cmap->rstart;
4554     cmap  = a->garray;
4555     nzA   = a->A->cmap->n;
4556     nzB   = a->B->cmap->n;
4557     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4558     ncols = 0;
4559     for (i=0; i<nzB; i++) {
4560       if (cmap[i] < start) idx[ncols++] = cmap[i];
4561       else break;
4562     }
4563     imark = i;
4564     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4565     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4566     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4567     ierr = PetscFree(idx);CHKERRQ(ierr);
4568   } else {
4569     iscola = *col;
4570   }
4571   if (scall != MAT_INITIAL_MATRIX){
4572     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4573     aloc[0] = *A_loc;
4574   }
4575   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4576   *A_loc = aloc[0];
4577   ierr = PetscFree(aloc);CHKERRQ(ierr);
4578   if (!row){
4579     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4580   }
4581   if (!col){
4582     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4583   }
4584   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4585   PetscFunctionReturn(0);
4586 }
4587 
4588 #undef __FUNCT__
4589 #define __FUNCT__ "MatGetBrowsOfAcols"
4590 /*@C
4591     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4592 
4593     Collective on Mat
4594 
4595    Input Parameters:
4596 +    A,B - the matrices in mpiaij format
4597 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4598 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4599 
4600    Output Parameter:
4601 +    rowb, colb - index sets of rows and columns of B to extract
4602 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4603 -    B_seq - the sequential matrix generated
4604 
4605     Level: developer
4606 
4607 @*/
4608 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4609 {
4610   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4611   PetscErrorCode    ierr;
4612   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4613   IS                isrowb,iscolb;
4614   Mat               *bseq;
4615 
4616   PetscFunctionBegin;
4617   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4618     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4619   }
4620   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4621 
4622   if (scall == MAT_INITIAL_MATRIX){
4623     start = A->cmap->rstart;
4624     cmap  = a->garray;
4625     nzA   = a->A->cmap->n;
4626     nzB   = a->B->cmap->n;
4627     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4628     ncols = 0;
4629     for (i=0; i<nzB; i++) {  /* row < local row index */
4630       if (cmap[i] < start) idx[ncols++] = cmap[i];
4631       else break;
4632     }
4633     imark = i;
4634     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4635     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4636     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4637     ierr = PetscFree(idx);CHKERRQ(ierr);
4638     *brstart = imark;
4639     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4640   } else {
4641     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4642     isrowb = *rowb; iscolb = *colb;
4643     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4644     bseq[0] = *B_seq;
4645   }
4646   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4647   *B_seq = bseq[0];
4648   ierr = PetscFree(bseq);CHKERRQ(ierr);
4649   if (!rowb){
4650     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4651   } else {
4652     *rowb = isrowb;
4653   }
4654   if (!colb){
4655     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4656   } else {
4657     *colb = iscolb;
4658   }
4659   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4660   PetscFunctionReturn(0);
4661 }
4662 
4663 #undef __FUNCT__
4664 #define __FUNCT__ "MatGetBrowsOfAoCols"
4665 /*@C
4666     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4667     of the OFF-DIAGONAL portion of local A
4668 
4669     Collective on Mat
4670 
4671    Input Parameters:
4672 +    A,B - the matrices in mpiaij format
4673 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4674 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4675 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4676 
4677    Output Parameter:
4678 +    B_oth - the sequential matrix generated
4679 
4680     Level: developer
4681 
4682 @*/
4683 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth)
4684 {
4685   VecScatter_MPI_General *gen_to,*gen_from;
4686   PetscErrorCode         ierr;
4687   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4688   Mat_SeqAIJ             *b_oth;
4689   VecScatter             ctx=a->Mvctx;
4690   MPI_Comm               comm=((PetscObject)ctx)->comm;
4691   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4692   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4693   PetscScalar            *rvalues,*svalues;
4694   MatScalar              *b_otha,*bufa,*bufA;
4695   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4696   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4697   MPI_Status             *sstatus,rstatus;
4698   PetscMPIInt            jj;
4699   PetscInt               *cols,sbs,rbs;
4700   PetscScalar            *vals;
4701 
4702   PetscFunctionBegin;
4703   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4704     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4705   }
4706   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4707   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4708 
4709   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4710   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4711   rvalues  = gen_from->values; /* holds the length of receiving row */
4712   svalues  = gen_to->values;   /* holds the length of sending row */
4713   nrecvs   = gen_from->n;
4714   nsends   = gen_to->n;
4715 
4716   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4717   srow     = gen_to->indices;   /* local row index to be sent */
4718   sstarts  = gen_to->starts;
4719   sprocs   = gen_to->procs;
4720   sstatus  = gen_to->sstatus;
4721   sbs      = gen_to->bs;
4722   rstarts  = gen_from->starts;
4723   rprocs   = gen_from->procs;
4724   rbs      = gen_from->bs;
4725 
4726   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4727   if (scall == MAT_INITIAL_MATRIX){
4728     /* i-array */
4729     /*---------*/
4730     /*  post receives */
4731     for (i=0; i<nrecvs; i++){
4732       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4733       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4734       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4735     }
4736 
4737     /* pack the outgoing message */
4738     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4739     rstartsj = sstartsj + nsends +1;
4740     sstartsj[0] = 0;  rstartsj[0] = 0;
4741     len = 0; /* total length of j or a array to be sent */
4742     k = 0;
4743     for (i=0; i<nsends; i++){
4744       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4745       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4746       for (j=0; j<nrows; j++) {
4747         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4748         for (l=0; l<sbs; l++){
4749           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4750           rowlen[j*sbs+l] = ncols;
4751           len += ncols;
4752           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4753         }
4754         k++;
4755       }
4756       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4757       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4758     }
4759     /* recvs and sends of i-array are completed */
4760     i = nrecvs;
4761     while (i--) {
4762       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4763     }
4764     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4765 
4766     /* allocate buffers for sending j and a arrays */
4767     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4768     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4769 
4770     /* create i-array of B_oth */
4771     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4772     b_othi[0] = 0;
4773     len = 0; /* total length of j or a array to be received */
4774     k = 0;
4775     for (i=0; i<nrecvs; i++){
4776       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4777       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4778       for (j=0; j<nrows; j++) {
4779         b_othi[k+1] = b_othi[k] + rowlen[j];
4780         len += rowlen[j]; k++;
4781       }
4782       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4783     }
4784 
4785     /* allocate space for j and a arrrays of B_oth */
4786     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4787     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4788 
4789     /* j-array */
4790     /*---------*/
4791     /*  post receives of j-array */
4792     for (i=0; i<nrecvs; i++){
4793       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4794       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4795     }
4796 
4797     /* pack the outgoing message j-array */
4798     k = 0;
4799     for (i=0; i<nsends; i++){
4800       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4801       bufJ = bufj+sstartsj[i];
4802       for (j=0; j<nrows; j++) {
4803         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4804         for (ll=0; ll<sbs; ll++){
4805           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4806           for (l=0; l<ncols; l++){
4807             *bufJ++ = cols[l];
4808           }
4809           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4810         }
4811       }
4812       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4813     }
4814 
4815     /* recvs and sends of j-array are completed */
4816     i = nrecvs;
4817     while (i--) {
4818       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4819     }
4820     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4821   } else if (scall == MAT_REUSE_MATRIX){
4822     sstartsj = *startsj;
4823     rstartsj = sstartsj + nsends +1;
4824     bufa     = *bufa_ptr;
4825     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4826     b_otha   = b_oth->a;
4827   } else {
4828     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4829   }
4830 
4831   /* a-array */
4832   /*---------*/
4833   /*  post receives of a-array */
4834   for (i=0; i<nrecvs; i++){
4835     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4836     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4837   }
4838 
4839   /* pack the outgoing message a-array */
4840   k = 0;
4841   for (i=0; i<nsends; i++){
4842     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4843     bufA = bufa+sstartsj[i];
4844     for (j=0; j<nrows; j++) {
4845       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4846       for (ll=0; ll<sbs; ll++){
4847         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4848         for (l=0; l<ncols; l++){
4849           *bufA++ = vals[l];
4850         }
4851         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4852       }
4853     }
4854     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4855   }
4856   /* recvs and sends of a-array are completed */
4857   i = nrecvs;
4858   while (i--) {
4859     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4860   }
4861   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4862   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4863 
4864   if (scall == MAT_INITIAL_MATRIX){
4865     /* put together the new matrix */
4866     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4867 
4868     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4869     /* Since these are PETSc arrays, change flags to free them as necessary. */
4870     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4871     b_oth->free_a  = PETSC_TRUE;
4872     b_oth->free_ij = PETSC_TRUE;
4873     b_oth->nonew   = 0;
4874 
4875     ierr = PetscFree(bufj);CHKERRQ(ierr);
4876     if (!startsj || !bufa_ptr){
4877       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4878       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4879     } else {
4880       *startsj  = sstartsj;
4881       *bufa_ptr = bufa;
4882     }
4883   }
4884   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4885   PetscFunctionReturn(0);
4886 }
4887 
4888 #undef __FUNCT__
4889 #define __FUNCT__ "MatGetCommunicationStructs"
4890 /*@C
4891   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4892 
4893   Not Collective
4894 
4895   Input Parameters:
4896 . A - The matrix in mpiaij format
4897 
4898   Output Parameter:
4899 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4900 . colmap - A map from global column index to local index into lvec
4901 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4902 
4903   Level: developer
4904 
4905 @*/
4906 #if defined (PETSC_USE_CTABLE)
4907 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4908 #else
4909 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4910 #endif
4911 {
4912   Mat_MPIAIJ *a;
4913 
4914   PetscFunctionBegin;
4915   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4916   PetscValidPointer(lvec, 2)
4917   PetscValidPointer(colmap, 3)
4918   PetscValidPointer(multScatter, 4)
4919   a = (Mat_MPIAIJ *) A->data;
4920   if (lvec) *lvec = a->lvec;
4921   if (colmap) *colmap = a->colmap;
4922   if (multScatter) *multScatter = a->Mvctx;
4923   PetscFunctionReturn(0);
4924 }
4925 
4926 EXTERN_C_BEGIN
4927 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*);
4928 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*);
4929 EXTERN_C_END
4930 
4931 #include "../src/mat/impls/dense/mpi/mpidense.h"
4932 
4933 #undef __FUNCT__
4934 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4935 /*
4936     Computes (B'*A')' since computing B*A directly is untenable
4937 
4938                n                       p                          p
4939         (              )       (              )         (                  )
4940       m (      A       )  *  n (       B      )   =   m (         C        )
4941         (              )       (              )         (                  )
4942 
4943 */
4944 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4945 {
4946   PetscErrorCode     ierr;
4947   Mat                At,Bt,Ct;
4948 
4949   PetscFunctionBegin;
4950   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4951   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4952   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4953   ierr = MatDestroy(At);CHKERRQ(ierr);
4954   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4955   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4956   ierr = MatDestroy(Ct);CHKERRQ(ierr);
4957   PetscFunctionReturn(0);
4958 }
4959 
4960 #undef __FUNCT__
4961 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4962 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4963 {
4964   PetscErrorCode ierr;
4965   PetscInt       m=A->rmap->n,n=B->cmap->n;
4966   Mat            Cmat;
4967 
4968   PetscFunctionBegin;
4969   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4970   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
4971   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4972   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4973   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
4974   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4975   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4976   *C   = Cmat;
4977   PetscFunctionReturn(0);
4978 }
4979 
4980 /* ----------------------------------------------------------------*/
4981 #undef __FUNCT__
4982 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4983 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4984 {
4985   PetscErrorCode ierr;
4986 
4987   PetscFunctionBegin;
4988   if (scall == MAT_INITIAL_MATRIX){
4989     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4990   }
4991   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4992   PetscFunctionReturn(0);
4993 }
4994 
4995 EXTERN_C_BEGIN
4996 #if defined(PETSC_HAVE_MUMPS)
4997 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*);
4998 #endif
4999 #if defined(PETSC_HAVE_PASTIX)
5000 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5001 #endif
5002 #if defined(PETSC_HAVE_SUPERLU_DIST)
5003 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5004 #endif
5005 #if defined(PETSC_HAVE_SPOOLES)
5006 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5007 #endif
5008 EXTERN_C_END
5009 
5010 /*MC
5011    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5012 
5013    Options Database Keys:
5014 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5015 
5016   Level: beginner
5017 
5018 .seealso: MatCreateMPIAIJ()
5019 M*/
5020 
5021 EXTERN_C_BEGIN
5022 #undef __FUNCT__
5023 #define __FUNCT__ "MatCreate_MPIAIJ"
5024 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
5025 {
5026   Mat_MPIAIJ     *b;
5027   PetscErrorCode ierr;
5028   PetscMPIInt    size;
5029 
5030   PetscFunctionBegin;
5031   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5032 
5033   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5034   B->data         = (void*)b;
5035   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5036   B->rmap->bs      = 1;
5037   B->assembled    = PETSC_FALSE;
5038   B->mapping      = 0;
5039 
5040   B->insertmode      = NOT_SET_VALUES;
5041   b->size            = size;
5042   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5043 
5044   /* build cache for off array entries formed */
5045   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5046   b->donotstash  = PETSC_FALSE;
5047   b->colmap      = 0;
5048   b->garray      = 0;
5049   b->roworiented = PETSC_TRUE;
5050 
5051   /* stuff used for matrix vector multiply */
5052   b->lvec      = PETSC_NULL;
5053   b->Mvctx     = PETSC_NULL;
5054 
5055   /* stuff for MatGetRow() */
5056   b->rowindices   = 0;
5057   b->rowvalues    = 0;
5058   b->getrowactive = PETSC_FALSE;
5059 
5060 #if defined(PETSC_HAVE_SPOOLES)
5061   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_spooles_C",
5062                                      "MatGetFactor_mpiaij_spooles",
5063                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5064 #endif
5065 #if defined(PETSC_HAVE_MUMPS)
5066   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_mumps_C",
5067                                      "MatGetFactor_mpiaij_mumps",
5068                                      MatGetFactor_mpiaij_mumps);CHKERRQ(ierr);
5069 #endif
5070 #if defined(PETSC_HAVE_PASTIX)
5071   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_pastix_C",
5072 					   "MatGetFactor_mpiaij_pastix",
5073 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5074 #endif
5075 #if defined(PETSC_HAVE_SUPERLU_DIST)
5076   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_superlu_dist_C",
5077                                      "MatGetFactor_mpiaij_superlu_dist",
5078                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5079 #endif
5080   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5081                                      "MatStoreValues_MPIAIJ",
5082                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5083   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5084                                      "MatRetrieveValues_MPIAIJ",
5085                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5086   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5087 				     "MatGetDiagonalBlock_MPIAIJ",
5088                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5089   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5090 				     "MatIsTranspose_MPIAIJ",
5091 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5092   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5093 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5094 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5095   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5096 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5097 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5098   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5099 				     "MatDiagonalScaleLocal_MPIAIJ",
5100 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5101   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
5102                                      "MatConvert_MPIAIJ_MPICSRPERM",
5103                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
5104   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
5105                                      "MatConvert_MPIAIJ_MPICRL",
5106                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
5107   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5108                                      "MatMatMult_MPIDense_MPIAIJ",
5109                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5110   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5111                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5112                                       MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5113   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5114                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5115                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5116   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5117   PetscFunctionReturn(0);
5118 }
5119 EXTERN_C_END
5120 
5121 #undef __FUNCT__
5122 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5123 /*@
5124      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5125          and "off-diagonal" part of the matrix in CSR format.
5126 
5127    Collective on MPI_Comm
5128 
5129    Input Parameters:
5130 +  comm - MPI communicator
5131 .  m - number of local rows (Cannot be PETSC_DECIDE)
5132 .  n - This value should be the same as the local size used in creating the
5133        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5134        calculated if N is given) For square matrices n is almost always m.
5135 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5136 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5137 .   i - row indices for "diagonal" portion of matrix
5138 .   j - column indices
5139 .   a - matrix values
5140 .   oi - row indices for "off-diagonal" portion of matrix
5141 .   oj - column indices
5142 -   oa - matrix values
5143 
5144    Output Parameter:
5145 .   mat - the matrix
5146 
5147    Level: advanced
5148 
5149    Notes:
5150        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5151 
5152        The i and j indices are 0 based
5153 
5154        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5155 
5156 
5157 .keywords: matrix, aij, compressed row, sparse, parallel
5158 
5159 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5160           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5161 @*/
5162 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5163 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5164 {
5165   PetscErrorCode ierr;
5166   Mat_MPIAIJ     *maij;
5167 
5168  PetscFunctionBegin;
5169   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5170   if (i[0]) {
5171     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5172   }
5173   if (oi[0]) {
5174     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5175   }
5176   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5177   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5178   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5179   maij = (Mat_MPIAIJ*) (*mat)->data;
5180   maij->donotstash     = PETSC_TRUE;
5181   (*mat)->preallocated = PETSC_TRUE;
5182 
5183   ierr = PetscMapSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5184   ierr = PetscMapSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5185   ierr = PetscMapSetUp((*mat)->rmap);CHKERRQ(ierr);
5186   ierr = PetscMapSetUp((*mat)->cmap);CHKERRQ(ierr);
5187 
5188   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5189   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5190 
5191   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5192   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5193   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5194   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5195 
5196   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5197   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5198   PetscFunctionReturn(0);
5199 }
5200 
5201 /*
5202     Special version for direct calls from Fortran
5203 */
5204 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5205 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5206 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5207 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5208 #endif
5209 
5210 /* Change these macros so can be used in void function */
5211 #undef CHKERRQ
5212 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5213 #undef SETERRQ2
5214 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5215 #undef SETERRQ
5216 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5217 
5218 EXTERN_C_BEGIN
5219 #undef __FUNCT__
5220 #define __FUNCT__ "matsetvaluesmpiaij_"
5221 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5222 {
5223   Mat             mat = *mmat;
5224   PetscInt        m = *mm, n = *mn;
5225   InsertMode      addv = *maddv;
5226   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5227   PetscScalar     value;
5228   PetscErrorCode  ierr;
5229 
5230   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5231   if (mat->insertmode == NOT_SET_VALUES) {
5232     mat->insertmode = addv;
5233   }
5234 #if defined(PETSC_USE_DEBUG)
5235   else if (mat->insertmode != addv) {
5236     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5237   }
5238 #endif
5239   {
5240   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5241   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5242   PetscTruth      roworiented = aij->roworiented;
5243 
5244   /* Some Variables required in the macro */
5245   Mat             A = aij->A;
5246   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5247   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5248   MatScalar       *aa = a->a;
5249   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5250   Mat             B = aij->B;
5251   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5252   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5253   MatScalar       *ba = b->a;
5254 
5255   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5256   PetscInt        nonew = a->nonew;
5257   MatScalar       *ap1,*ap2;
5258 
5259   PetscFunctionBegin;
5260   for (i=0; i<m; i++) {
5261     if (im[i] < 0) continue;
5262 #if defined(PETSC_USE_DEBUG)
5263     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5264 #endif
5265     if (im[i] >= rstart && im[i] < rend) {
5266       row      = im[i] - rstart;
5267       lastcol1 = -1;
5268       rp1      = aj + ai[row];
5269       ap1      = aa + ai[row];
5270       rmax1    = aimax[row];
5271       nrow1    = ailen[row];
5272       low1     = 0;
5273       high1    = nrow1;
5274       lastcol2 = -1;
5275       rp2      = bj + bi[row];
5276       ap2      = ba + bi[row];
5277       rmax2    = bimax[row];
5278       nrow2    = bilen[row];
5279       low2     = 0;
5280       high2    = nrow2;
5281 
5282       for (j=0; j<n; j++) {
5283         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5284         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5285         if (in[j] >= cstart && in[j] < cend){
5286           col = in[j] - cstart;
5287           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5288         } else if (in[j] < 0) continue;
5289 #if defined(PETSC_USE_DEBUG)
5290         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5291 #endif
5292         else {
5293           if (mat->was_assembled) {
5294             if (!aij->colmap) {
5295               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5296             }
5297 #if defined (PETSC_USE_CTABLE)
5298             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5299 	    col--;
5300 #else
5301             col = aij->colmap[in[j]] - 1;
5302 #endif
5303             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5304               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5305               col =  in[j];
5306               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5307               B = aij->B;
5308               b = (Mat_SeqAIJ*)B->data;
5309               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5310               rp2      = bj + bi[row];
5311               ap2      = ba + bi[row];
5312               rmax2    = bimax[row];
5313               nrow2    = bilen[row];
5314               low2     = 0;
5315               high2    = nrow2;
5316               bm       = aij->B->rmap->n;
5317               ba = b->a;
5318             }
5319           } else col = in[j];
5320           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5321         }
5322       }
5323     } else {
5324       if (!aij->donotstash) {
5325         if (roworiented) {
5326           if (ignorezeroentries && v[i*n] == 0.0) continue;
5327           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
5328         } else {
5329           if (ignorezeroentries && v[i] == 0.0) continue;
5330           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
5331         }
5332       }
5333     }
5334   }}
5335   PetscFunctionReturnVoid();
5336 }
5337 EXTERN_C_END
5338 
5339