xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision ee1cef2c4f92b4b45b8aa3ceac59cb944062fab9)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "../src/inline/spops.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscTruth     aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap->n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap->n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap->n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap->n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 
207 #define CHUNKSIZE   15
208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
209 { \
210     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
211     lastcol1 = col;\
212     while (high1-low1 > 5) { \
213       t = (low1+high1)/2; \
214       if (rp1[t] > col) high1 = t; \
215       else             low1  = t; \
216     } \
217       for (_i=low1; _i<high1; _i++) { \
218         if (rp1[_i] > col) break; \
219         if (rp1[_i] == col) { \
220           if (addv == ADD_VALUES) ap1[_i] += value;   \
221           else                    ap1[_i] = value; \
222           goto a_noinsert; \
223         } \
224       }  \
225       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
226       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
227       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
228       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
229       N = nrow1++ - 1; a->nz++; high1++; \
230       /* shift up all the later entries in this row */ \
231       for (ii=N; ii>=_i; ii--) { \
232         rp1[ii+1] = rp1[ii]; \
233         ap1[ii+1] = ap1[ii]; \
234       } \
235       rp1[_i] = col;  \
236       ap1[_i] = value;  \
237       a_noinsert: ; \
238       ailen[row] = nrow1; \
239 }
240 
241 
242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
243 { \
244     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
245     lastcol2 = col;\
246     while (high2-low2 > 5) { \
247       t = (low2+high2)/2; \
248       if (rp2[t] > col) high2 = t; \
249       else             low2  = t; \
250     } \
251     for (_i=low2; _i<high2; _i++) {		\
252       if (rp2[_i] > col) break;			\
253       if (rp2[_i] == col) {			      \
254 	if (addv == ADD_VALUES) ap2[_i] += value;     \
255 	else                    ap2[_i] = value;      \
256 	goto b_noinsert;			      \
257       }						      \
258     }							      \
259     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
260     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
261     if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
262     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
263     N = nrow2++ - 1; b->nz++; high2++;					\
264     /* shift up all the later entries in this row */			\
265     for (ii=N; ii>=_i; ii--) {						\
266       rp2[ii+1] = rp2[ii];						\
267       ap2[ii+1] = ap2[ii];						\
268     }									\
269     rp2[_i] = col;							\
270     ap2[_i] = value;							\
271     b_noinsert: ;								\
272     bilen[row] = nrow2;							\
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
278 {
279   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
280   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
281   PetscErrorCode ierr;
282   PetscInt       l,*garray = mat->garray,diag;
283 
284   PetscFunctionBegin;
285   /* code only works for square matrices A */
286 
287   /* find size of row to the left of the diagonal part */
288   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
289   row  = row - diag;
290   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
291     if (garray[b->j[b->i[row]+l]] > diag) break;
292   }
293   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
294 
295   /* diagonal part */
296   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
297 
298   /* right of diagonal part */
299   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
300   PetscFunctionReturn(0);
301 }
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "MatSetValues_MPIAIJ"
305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
306 {
307   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
308   PetscScalar    value;
309   PetscErrorCode ierr;
310   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
311   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
312   PetscTruth     roworiented = aij->roworiented;
313 
314   /* Some Variables required in the macro */
315   Mat            A = aij->A;
316   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
317   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
318   MatScalar      *aa = a->a;
319   PetscTruth     ignorezeroentries = a->ignorezeroentries;
320   Mat            B = aij->B;
321   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
322   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
323   MatScalar      *ba = b->a;
324 
325   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
326   PetscInt       nonew = a->nonew;
327   MatScalar      *ap1,*ap2;
328 
329   PetscFunctionBegin;
330   for (i=0; i<m; i++) {
331     if (im[i] < 0) continue;
332 #if defined(PETSC_USE_DEBUG)
333     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
334 #endif
335     if (im[i] >= rstart && im[i] < rend) {
336       row      = im[i] - rstart;
337       lastcol1 = -1;
338       rp1      = aj + ai[row];
339       ap1      = aa + ai[row];
340       rmax1    = aimax[row];
341       nrow1    = ailen[row];
342       low1     = 0;
343       high1    = nrow1;
344       lastcol2 = -1;
345       rp2      = bj + bi[row];
346       ap2      = ba + bi[row];
347       rmax2    = bimax[row];
348       nrow2    = bilen[row];
349       low2     = 0;
350       high2    = nrow2;
351 
352       for (j=0; j<n; j++) {
353         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
354         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
355         if (in[j] >= cstart && in[j] < cend){
356           col = in[j] - cstart;
357           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
358         } else if (in[j] < 0) continue;
359 #if defined(PETSC_USE_DEBUG)
360         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
361 #endif
362         else {
363           if (mat->was_assembled) {
364             if (!aij->colmap) {
365               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
366             }
367 #if defined (PETSC_USE_CTABLE)
368             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
369 	    col--;
370 #else
371             col = aij->colmap[in[j]] - 1;
372 #endif
373             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
374               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
375               col =  in[j];
376               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
377               B = aij->B;
378               b = (Mat_SeqAIJ*)B->data;
379               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
380               rp2      = bj + bi[row];
381               ap2      = ba + bi[row];
382               rmax2    = bimax[row];
383               nrow2    = bilen[row];
384               low2     = 0;
385               high2    = nrow2;
386               bm       = aij->B->rmap->n;
387               ba = b->a;
388             }
389           } else col = in[j];
390           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
391         }
392       }
393     } else {
394       if (!aij->donotstash) {
395         if (roworiented) {
396           if (ignorezeroentries && v[i*n] == 0.0) continue;
397           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
398         } else {
399           if (ignorezeroentries && v[i] == 0.0) continue;
400           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
401         }
402       }
403     }
404   }
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNCT__
409 #define __FUNCT__ "MatGetValues_MPIAIJ"
410 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
411 {
412   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
413   PetscErrorCode ierr;
414   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
415   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
416 
417   PetscFunctionBegin;
418   for (i=0; i<m; i++) {
419     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
420     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
421     if (idxm[i] >= rstart && idxm[i] < rend) {
422       row = idxm[i] - rstart;
423       for (j=0; j<n; j++) {
424         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
425         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
426         if (idxn[j] >= cstart && idxn[j] < cend){
427           col = idxn[j] - cstart;
428           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
429         } else {
430           if (!aij->colmap) {
431             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
432           }
433 #if defined (PETSC_USE_CTABLE)
434           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
435           col --;
436 #else
437           col = aij->colmap[idxn[j]] - 1;
438 #endif
439           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
440           else {
441             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
442           }
443         }
444       }
445     } else {
446       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
447     }
448   }
449   PetscFunctionReturn(0);
450 }
451 
452 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
453 
454 #undef __FUNCT__
455 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
456 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
457 {
458   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
459   PetscErrorCode ierr;
460   PetscInt       nstash,reallocs;
461   InsertMode     addv;
462 
463   PetscFunctionBegin;
464   if (aij->donotstash) {
465     PetscFunctionReturn(0);
466   }
467 
468   /* make sure all processors are either in INSERTMODE or ADDMODE */
469   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
470   if (addv == (ADD_VALUES|INSERT_VALUES)) {
471     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
472   }
473   mat->insertmode = addv; /* in case this processor had no cache */
474 
475   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
476   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
477   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
478   PetscFunctionReturn(0);
479 }
480 
481 #undef __FUNCT__
482 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
483 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
484 {
485   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
486   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
487   PetscErrorCode ierr;
488   PetscMPIInt    n;
489   PetscInt       i,j,rstart,ncols,flg;
490   PetscInt       *row,*col;
491   PetscTruth     other_disassembled;
492   PetscScalar    *val;
493   InsertMode     addv = mat->insertmode;
494 
495   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
496   PetscFunctionBegin;
497   if (!aij->donotstash) {
498     while (1) {
499       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
500       if (!flg) break;
501 
502       for (i=0; i<n;) {
503         /* Now identify the consecutive vals belonging to the same row */
504         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
505         if (j < n) ncols = j-i;
506         else       ncols = n-i;
507         /* Now assemble all these values with a single function call */
508         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
509         i = j;
510       }
511     }
512     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
513   }
514   a->compressedrow.use     = PETSC_FALSE;
515   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
516   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
517 
518   /* determine if any processor has disassembled, if so we must
519      also disassemble ourselfs, in order that we may reassemble. */
520   /*
521      if nonzero structure of submatrix B cannot change then we know that
522      no processor disassembled thus we can skip this stuff
523   */
524   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
525     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
526     if (mat->was_assembled && !other_disassembled) {
527       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
528     }
529   }
530   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
531     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
532   }
533   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
534   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
535   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
536   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
537 
538   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
539   aij->rowvalues = 0;
540 
541   /* used by MatAXPY() */
542   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
543   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
544 
545   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
546   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
547   PetscFunctionReturn(0);
548 }
549 
550 #undef __FUNCT__
551 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
552 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
553 {
554   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
555   PetscErrorCode ierr;
556 
557   PetscFunctionBegin;
558   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
559   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
560   PetscFunctionReturn(0);
561 }
562 
563 #undef __FUNCT__
564 #define __FUNCT__ "MatZeroRows_MPIAIJ"
565 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
566 {
567   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
568   PetscErrorCode ierr;
569   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
570   PetscInt       i,*owners = A->rmap->range;
571   PetscInt       *nprocs,j,idx,nsends,row;
572   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
573   PetscInt       *rvalues,count,base,slen,*source;
574   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
575   MPI_Comm       comm = ((PetscObject)A)->comm;
576   MPI_Request    *send_waits,*recv_waits;
577   MPI_Status     recv_status,*send_status;
578 #if defined(PETSC_DEBUG)
579   PetscTruth     found = PETSC_FALSE;
580 #endif
581 
582   PetscFunctionBegin;
583   /*  first count number of contributors to each processor */
584   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
585   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
586   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
587   j = 0;
588   for (i=0; i<N; i++) {
589     if (lastidx > (idx = rows[i])) j = 0;
590     lastidx = idx;
591     for (; j<size; j++) {
592       if (idx >= owners[j] && idx < owners[j+1]) {
593         nprocs[2*j]++;
594         nprocs[2*j+1] = 1;
595         owner[i] = j;
596 #if defined(PETSC_DEBUG)
597         found = PETSC_TRUE;
598 #endif
599         break;
600       }
601     }
602 #if defined(PETSC_DEBUG)
603     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
604     found = PETSC_FALSE;
605 #endif
606   }
607   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
608 
609   /* inform other processors of number of messages and max length*/
610   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
611 
612   /* post receives:   */
613   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
614   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
615   for (i=0; i<nrecvs; i++) {
616     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
617   }
618 
619   /* do sends:
620       1) starts[i] gives the starting index in svalues for stuff going to
621          the ith processor
622   */
623   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
624   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
625   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
626   starts[0] = 0;
627   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
628   for (i=0; i<N; i++) {
629     svalues[starts[owner[i]]++] = rows[i];
630   }
631 
632   starts[0] = 0;
633   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
634   count = 0;
635   for (i=0; i<size; i++) {
636     if (nprocs[2*i+1]) {
637       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
638     }
639   }
640   ierr = PetscFree(starts);CHKERRQ(ierr);
641 
642   base = owners[rank];
643 
644   /*  wait on receives */
645   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
646   source = lens + nrecvs;
647   count  = nrecvs; slen = 0;
648   while (count) {
649     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
650     /* unpack receives into our local space */
651     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
652     source[imdex]  = recv_status.MPI_SOURCE;
653     lens[imdex]    = n;
654     slen          += n;
655     count--;
656   }
657   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
658 
659   /* move the data into the send scatter */
660   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
661   count = 0;
662   for (i=0; i<nrecvs; i++) {
663     values = rvalues + i*nmax;
664     for (j=0; j<lens[i]; j++) {
665       lrows[count++] = values[j] - base;
666     }
667   }
668   ierr = PetscFree(rvalues);CHKERRQ(ierr);
669   ierr = PetscFree(lens);CHKERRQ(ierr);
670   ierr = PetscFree(owner);CHKERRQ(ierr);
671   ierr = PetscFree(nprocs);CHKERRQ(ierr);
672 
673   /* actually zap the local rows */
674   /*
675         Zero the required rows. If the "diagonal block" of the matrix
676      is square and the user wishes to set the diagonal we use separate
677      code so that MatSetValues() is not called for each diagonal allocating
678      new memory, thus calling lots of mallocs and slowing things down.
679 
680        Contributed by: Matthew Knepley
681   */
682   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
683   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
684   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
685     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
686   } else if (diag != 0.0) {
687     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
688     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
689       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
690 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
691     }
692     for (i = 0; i < slen; i++) {
693       row  = lrows[i] + rstart;
694       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
695     }
696     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
697     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
698   } else {
699     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
700   }
701   ierr = PetscFree(lrows);CHKERRQ(ierr);
702 
703   /* wait on sends */
704   if (nsends) {
705     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
706     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
707     ierr = PetscFree(send_status);CHKERRQ(ierr);
708   }
709   ierr = PetscFree(send_waits);CHKERRQ(ierr);
710   ierr = PetscFree(svalues);CHKERRQ(ierr);
711 
712   PetscFunctionReturn(0);
713 }
714 
715 #undef __FUNCT__
716 #define __FUNCT__ "MatMult_MPIAIJ"
717 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
718 {
719   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
720   PetscErrorCode ierr;
721   PetscInt       nt;
722 
723   PetscFunctionBegin;
724   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
725   if (nt != A->cmap->n) {
726     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
727   }
728   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
729   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
730   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
731   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
732   PetscFunctionReturn(0);
733 }
734 
735 #undef __FUNCT__
736 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
737 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
738 {
739   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
740   PetscErrorCode ierr;
741 
742   PetscFunctionBegin;
743   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
744   PetscFunctionReturn(0);
745 }
746 
747 #undef __FUNCT__
748 #define __FUNCT__ "MatMultAdd_MPIAIJ"
749 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
750 {
751   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
752   PetscErrorCode ierr;
753 
754   PetscFunctionBegin;
755   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
756   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
757   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
758   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
759   PetscFunctionReturn(0);
760 }
761 
762 #undef __FUNCT__
763 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
764 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
765 {
766   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
767   PetscErrorCode ierr;
768   PetscTruth     merged;
769 
770   PetscFunctionBegin;
771   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
772   /* do nondiagonal part */
773   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
774   if (!merged) {
775     /* send it on its way */
776     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
777     /* do local part */
778     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
779     /* receive remote parts: note this assumes the values are not actually */
780     /* added in yy until the next line, */
781     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
782   } else {
783     /* do local part */
784     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
785     /* send it on its way */
786     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
787     /* values actually were received in the Begin() but we need to call this nop */
788     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
789   }
790   PetscFunctionReturn(0);
791 }
792 
793 EXTERN_C_BEGIN
794 #undef __FUNCT__
795 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
796 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
797 {
798   MPI_Comm       comm;
799   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
800   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
801   IS             Me,Notme;
802   PetscErrorCode ierr;
803   PetscInt       M,N,first,last,*notme,i;
804   PetscMPIInt    size;
805 
806   PetscFunctionBegin;
807 
808   /* Easy test: symmetric diagonal block */
809   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
810   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
811   if (!*f) PetscFunctionReturn(0);
812   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
813   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
814   if (size == 1) PetscFunctionReturn(0);
815 
816   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
817   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
818   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
819   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
820   for (i=0; i<first; i++) notme[i] = i;
821   for (i=last; i<M; i++) notme[i-last+first] = i;
822   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
823   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
824   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
825   Aoff = Aoffs[0];
826   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
827   Boff = Boffs[0];
828   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
829   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
830   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
831   ierr = ISDestroy(Me);CHKERRQ(ierr);
832   ierr = ISDestroy(Notme);CHKERRQ(ierr);
833 
834   PetscFunctionReturn(0);
835 }
836 EXTERN_C_END
837 
838 #undef __FUNCT__
839 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
840 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
841 {
842   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
843   PetscErrorCode ierr;
844 
845   PetscFunctionBegin;
846   /* do nondiagonal part */
847   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
848   /* send it on its way */
849   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
850   /* do local part */
851   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
852   /* receive remote parts */
853   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
854   PetscFunctionReturn(0);
855 }
856 
857 /*
858   This only works correctly for square matrices where the subblock A->A is the
859    diagonal block
860 */
861 #undef __FUNCT__
862 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
863 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
864 {
865   PetscErrorCode ierr;
866   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
867 
868   PetscFunctionBegin;
869   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
870   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) {
871     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
872   }
873   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
874   PetscFunctionReturn(0);
875 }
876 
877 #undef __FUNCT__
878 #define __FUNCT__ "MatScale_MPIAIJ"
879 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
880 {
881   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
882   PetscErrorCode ierr;
883 
884   PetscFunctionBegin;
885   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
886   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
887   PetscFunctionReturn(0);
888 }
889 
890 #undef __FUNCT__
891 #define __FUNCT__ "MatDestroy_MPIAIJ"
892 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
893 {
894   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
895   PetscErrorCode ierr;
896 
897   PetscFunctionBegin;
898 #if defined(PETSC_USE_LOG)
899   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
900 #endif
901   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
902   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
903   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
904   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
905 #if defined (PETSC_USE_CTABLE)
906   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
907 #else
908   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
909 #endif
910   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
911   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
912   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
913   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
914   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
915   ierr = PetscFree(aij);CHKERRQ(ierr);
916 
917   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
918   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
919   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
920   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
921   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
922   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
923   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
924   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
925   PetscFunctionReturn(0);
926 }
927 
928 #undef __FUNCT__
929 #define __FUNCT__ "MatView_MPIAIJ_Binary"
930 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
931 {
932   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
933   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
934   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
935   PetscErrorCode    ierr;
936   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
937   int               fd;
938   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
939   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
940   PetscScalar       *column_values;
941 
942   PetscFunctionBegin;
943   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
944   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
945   nz   = A->nz + B->nz;
946   if (!rank) {
947     header[0] = MAT_FILE_COOKIE;
948     header[1] = mat->rmap->N;
949     header[2] = mat->cmap->N;
950     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
951     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
952     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
953     /* get largest number of rows any processor has */
954     rlen = mat->rmap->n;
955     range = mat->rmap->range;
956     for (i=1; i<size; i++) {
957       rlen = PetscMax(rlen,range[i+1] - range[i]);
958     }
959   } else {
960     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
961     rlen = mat->rmap->n;
962   }
963 
964   /* load up the local row counts */
965   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
966   for (i=0; i<mat->rmap->n; i++) {
967     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
968   }
969 
970   /* store the row lengths to the file */
971   if (!rank) {
972     MPI_Status status;
973     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
974     for (i=1; i<size; i++) {
975       rlen = range[i+1] - range[i];
976       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
977       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
978     }
979   } else {
980     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
981   }
982   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
983 
984   /* load up the local column indices */
985   nzmax = nz; /* )th processor needs space a largest processor needs */
986   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
987   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
988   cnt  = 0;
989   for (i=0; i<mat->rmap->n; i++) {
990     for (j=B->i[i]; j<B->i[i+1]; j++) {
991       if ( (col = garray[B->j[j]]) > cstart) break;
992       column_indices[cnt++] = col;
993     }
994     for (k=A->i[i]; k<A->i[i+1]; k++) {
995       column_indices[cnt++] = A->j[k] + cstart;
996     }
997     for (; j<B->i[i+1]; j++) {
998       column_indices[cnt++] = garray[B->j[j]];
999     }
1000   }
1001   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1002 
1003   /* store the column indices to the file */
1004   if (!rank) {
1005     MPI_Status status;
1006     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1007     for (i=1; i<size; i++) {
1008       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1009       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1010       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1011       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1012     }
1013   } else {
1014     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1015     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1016   }
1017   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1018 
1019   /* load up the local column values */
1020   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1021   cnt  = 0;
1022   for (i=0; i<mat->rmap->n; i++) {
1023     for (j=B->i[i]; j<B->i[i+1]; j++) {
1024       if ( garray[B->j[j]] > cstart) break;
1025       column_values[cnt++] = B->a[j];
1026     }
1027     for (k=A->i[i]; k<A->i[i+1]; k++) {
1028       column_values[cnt++] = A->a[k];
1029     }
1030     for (; j<B->i[i+1]; j++) {
1031       column_values[cnt++] = B->a[j];
1032     }
1033   }
1034   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1035 
1036   /* store the column values to the file */
1037   if (!rank) {
1038     MPI_Status status;
1039     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1040     for (i=1; i<size; i++) {
1041       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1042       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1043       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1044       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1045     }
1046   } else {
1047     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1048     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1049   }
1050   ierr = PetscFree(column_values);CHKERRQ(ierr);
1051   PetscFunctionReturn(0);
1052 }
1053 
1054 #undef __FUNCT__
1055 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1056 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1057 {
1058   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1059   PetscErrorCode    ierr;
1060   PetscMPIInt       rank = aij->rank,size = aij->size;
1061   PetscTruth        isdraw,iascii,isbinary;
1062   PetscViewer       sviewer;
1063   PetscViewerFormat format;
1064 
1065   PetscFunctionBegin;
1066   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1067   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1068   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1069   if (iascii) {
1070     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1071     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1072       MatInfo    info;
1073       PetscTruth inodes;
1074 
1075       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1076       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1077       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1078       if (!inodes) {
1079         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1080 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1081       } else {
1082         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1083 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1084       }
1085       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1086       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1087       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1088       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1089       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1090       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1091       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1092       PetscFunctionReturn(0);
1093     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1094       PetscInt   inodecount,inodelimit,*inodes;
1095       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1096       if (inodes) {
1097         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1098       } else {
1099         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1100       }
1101       PetscFunctionReturn(0);
1102     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1103       PetscFunctionReturn(0);
1104     }
1105   } else if (isbinary) {
1106     if (size == 1) {
1107       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1108       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1109     } else {
1110       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1111     }
1112     PetscFunctionReturn(0);
1113   } else if (isdraw) {
1114     PetscDraw  draw;
1115     PetscTruth isnull;
1116     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1117     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1118   }
1119 
1120   if (size == 1) {
1121     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1122     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1123   } else {
1124     /* assemble the entire matrix onto first processor. */
1125     Mat         A;
1126     Mat_SeqAIJ  *Aloc;
1127     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1128     MatScalar   *a;
1129 
1130     if (mat->rmap->N > 1024) {
1131       PetscTruth flg = PETSC_FALSE;
1132 
1133       ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1134       if (!flg) {
1135         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1136       }
1137     }
1138 
1139     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1140     if (!rank) {
1141       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1142     } else {
1143       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1144     }
1145     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1146     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1147     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1148     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1149 
1150     /* copy over the A part */
1151     Aloc = (Mat_SeqAIJ*)aij->A->data;
1152     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1153     row = mat->rmap->rstart;
1154     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1155     for (i=0; i<m; i++) {
1156       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1157       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1158     }
1159     aj = Aloc->j;
1160     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1161 
1162     /* copy over the B part */
1163     Aloc = (Mat_SeqAIJ*)aij->B->data;
1164     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1165     row  = mat->rmap->rstart;
1166     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1167     ct   = cols;
1168     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1169     for (i=0; i<m; i++) {
1170       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1171       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1172     }
1173     ierr = PetscFree(ct);CHKERRQ(ierr);
1174     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1175     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1176     /*
1177        Everyone has to call to draw the matrix since the graphics waits are
1178        synchronized across all processors that share the PetscDraw object
1179     */
1180     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1181     if (!rank) {
1182       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1183       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1184     }
1185     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1186     ierr = MatDestroy(A);CHKERRQ(ierr);
1187   }
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 #undef __FUNCT__
1192 #define __FUNCT__ "MatView_MPIAIJ"
1193 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1194 {
1195   PetscErrorCode ierr;
1196   PetscTruth     iascii,isdraw,issocket,isbinary;
1197 
1198   PetscFunctionBegin;
1199   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1200   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1201   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1202   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1203   if (iascii || isdraw || isbinary || issocket) {
1204     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1205   } else {
1206     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1207   }
1208   PetscFunctionReturn(0);
1209 }
1210 
1211 #undef __FUNCT__
1212 #define __FUNCT__ "MatRelax_MPIAIJ"
1213 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1214 {
1215   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1216   PetscErrorCode ierr;
1217   Vec            bb1 = 0;
1218   PetscTruth     hasop;
1219 
1220   PetscFunctionBegin;
1221   if (its > 1) {ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);}
1222 
1223   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1224     if (flag & SOR_ZERO_INITIAL_GUESS) {
1225       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1226       its--;
1227     }
1228 
1229     while (its--) {
1230       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1231       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1232 
1233       /* update rhs: bb1 = bb - B*x */
1234       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1235       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1236 
1237       /* local sweep */
1238       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1239     }
1240   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1241     if (flag & SOR_ZERO_INITIAL_GUESS) {
1242       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1243       its--;
1244     }
1245     while (its--) {
1246       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1247       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1248 
1249       /* update rhs: bb1 = bb - B*x */
1250       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1251       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1252 
1253       /* local sweep */
1254       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1255     }
1256   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1257     if (flag & SOR_ZERO_INITIAL_GUESS) {
1258       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1259       its--;
1260     }
1261     while (its--) {
1262       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1263       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1264 
1265       /* update rhs: bb1 = bb - B*x */
1266       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1267       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1268 
1269       /* local sweep */
1270       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1271     }
1272   }  else if (flag & SOR_EISENSTAT) {
1273     Vec         xx1;
1274 
1275     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1276     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1277     ierr = (*mat->A->ops->relax)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1278 
1279     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1280     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1281     if (!mat->diag) {
1282       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1283       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1284     }
1285     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1286     if (hasop) {
1287       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1288     } else {
1289       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1290     }
1291     ierr = VecWAXPY(bb1,-1.0,bb1,bb);CHKERRQ(ierr);
1292     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1293 
1294     /* local sweep */
1295     ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1296     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1297     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1298   } else {
1299     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1300   }
1301 
1302   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 #undef __FUNCT__
1307 #define __FUNCT__ "MatPermute_MPIAIJ"
1308 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1309 {
1310   MPI_Comm       comm,pcomm;
1311   PetscInt       first,local_size,nrows;
1312   const PetscInt *rows;
1313   PetscMPIInt    size;
1314   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1315   PetscErrorCode ierr;
1316 
1317   PetscFunctionBegin;
1318   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1319   /* make a collective version of 'rowp' */
1320   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1321   if (pcomm==comm) {
1322     crowp = rowp;
1323   } else {
1324     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1325     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1326     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1327     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1328   }
1329   /* collect the global row permutation and invert it */
1330   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1331   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1332   if (pcomm!=comm) {
1333     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1334   }
1335   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1336   /* get the local target indices */
1337   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1338   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1339   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1340   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
1341   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1342   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1343   /* the column permutation is so much easier;
1344      make a local version of 'colp' and invert it */
1345   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1346   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1347   if (size==1) {
1348     lcolp = colp;
1349   } else {
1350     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1351     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1352     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
1353   }
1354   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1355   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1356   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1357   if (size>1) {
1358     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1359     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1360   }
1361   /* now we just get the submatrix */
1362   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1363   /* clean up */
1364   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1365   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1366   PetscFunctionReturn(0);
1367 }
1368 
1369 #undef __FUNCT__
1370 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1371 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1372 {
1373   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1374   Mat            A = mat->A,B = mat->B;
1375   PetscErrorCode ierr;
1376   PetscReal      isend[5],irecv[5];
1377 
1378   PetscFunctionBegin;
1379   info->block_size     = 1.0;
1380   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1381   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1382   isend[3] = info->memory;  isend[4] = info->mallocs;
1383   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1384   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1385   isend[3] += info->memory;  isend[4] += info->mallocs;
1386   if (flag == MAT_LOCAL) {
1387     info->nz_used      = isend[0];
1388     info->nz_allocated = isend[1];
1389     info->nz_unneeded  = isend[2];
1390     info->memory       = isend[3];
1391     info->mallocs      = isend[4];
1392   } else if (flag == MAT_GLOBAL_MAX) {
1393     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1394     info->nz_used      = irecv[0];
1395     info->nz_allocated = irecv[1];
1396     info->nz_unneeded  = irecv[2];
1397     info->memory       = irecv[3];
1398     info->mallocs      = irecv[4];
1399   } else if (flag == MAT_GLOBAL_SUM) {
1400     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1401     info->nz_used      = irecv[0];
1402     info->nz_allocated = irecv[1];
1403     info->nz_unneeded  = irecv[2];
1404     info->memory       = irecv[3];
1405     info->mallocs      = irecv[4];
1406   }
1407   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1408   info->fill_ratio_needed = 0;
1409   info->factor_mallocs    = 0;
1410 
1411   PetscFunctionReturn(0);
1412 }
1413 
1414 #undef __FUNCT__
1415 #define __FUNCT__ "MatSetOption_MPIAIJ"
1416 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1417 {
1418   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1419   PetscErrorCode ierr;
1420 
1421   PetscFunctionBegin;
1422   switch (op) {
1423   case MAT_NEW_NONZERO_LOCATIONS:
1424   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1425   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1426   case MAT_KEEP_NONZERO_PATTERN:
1427   case MAT_NEW_NONZERO_LOCATION_ERR:
1428   case MAT_USE_INODES:
1429   case MAT_IGNORE_ZERO_ENTRIES:
1430     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1431     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1432     break;
1433   case MAT_ROW_ORIENTED:
1434     a->roworiented = flg;
1435     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1436     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1437     break;
1438   case MAT_NEW_DIAGONALS:
1439     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1440     break;
1441   case MAT_IGNORE_OFF_PROC_ENTRIES:
1442     a->donotstash = PETSC_TRUE;
1443     break;
1444   case MAT_SYMMETRIC:
1445     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1446     break;
1447   case MAT_STRUCTURALLY_SYMMETRIC:
1448   case MAT_HERMITIAN:
1449   case MAT_SYMMETRY_ETERNAL:
1450     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1451     break;
1452   default:
1453     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1454   }
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 #undef __FUNCT__
1459 #define __FUNCT__ "MatGetRow_MPIAIJ"
1460 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1461 {
1462   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1463   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1464   PetscErrorCode ierr;
1465   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1466   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1467   PetscInt       *cmap,*idx_p;
1468 
1469   PetscFunctionBegin;
1470   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1471   mat->getrowactive = PETSC_TRUE;
1472 
1473   if (!mat->rowvalues && (idx || v)) {
1474     /*
1475         allocate enough space to hold information from the longest row.
1476     */
1477     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1478     PetscInt     max = 1,tmp;
1479     for (i=0; i<matin->rmap->n; i++) {
1480       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1481       if (max < tmp) { max = tmp; }
1482     }
1483     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1484     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1485   }
1486 
1487   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1488   lrow = row - rstart;
1489 
1490   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1491   if (!v)   {pvA = 0; pvB = 0;}
1492   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1493   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1494   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1495   nztot = nzA + nzB;
1496 
1497   cmap  = mat->garray;
1498   if (v  || idx) {
1499     if (nztot) {
1500       /* Sort by increasing column numbers, assuming A and B already sorted */
1501       PetscInt imark = -1;
1502       if (v) {
1503         *v = v_p = mat->rowvalues;
1504         for (i=0; i<nzB; i++) {
1505           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1506           else break;
1507         }
1508         imark = i;
1509         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1510         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1511       }
1512       if (idx) {
1513         *idx = idx_p = mat->rowindices;
1514         if (imark > -1) {
1515           for (i=0; i<imark; i++) {
1516             idx_p[i] = cmap[cworkB[i]];
1517           }
1518         } else {
1519           for (i=0; i<nzB; i++) {
1520             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1521             else break;
1522           }
1523           imark = i;
1524         }
1525         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1526         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1527       }
1528     } else {
1529       if (idx) *idx = 0;
1530       if (v)   *v   = 0;
1531     }
1532   }
1533   *nz = nztot;
1534   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1535   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1536   PetscFunctionReturn(0);
1537 }
1538 
1539 #undef __FUNCT__
1540 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1541 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1542 {
1543   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1544 
1545   PetscFunctionBegin;
1546   if (!aij->getrowactive) {
1547     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1548   }
1549   aij->getrowactive = PETSC_FALSE;
1550   PetscFunctionReturn(0);
1551 }
1552 
1553 #undef __FUNCT__
1554 #define __FUNCT__ "MatNorm_MPIAIJ"
1555 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1556 {
1557   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1558   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1559   PetscErrorCode ierr;
1560   PetscInt       i,j,cstart = mat->cmap->rstart;
1561   PetscReal      sum = 0.0;
1562   MatScalar      *v;
1563 
1564   PetscFunctionBegin;
1565   if (aij->size == 1) {
1566     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1567   } else {
1568     if (type == NORM_FROBENIUS) {
1569       v = amat->a;
1570       for (i=0; i<amat->nz; i++) {
1571 #if defined(PETSC_USE_COMPLEX)
1572         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1573 #else
1574         sum += (*v)*(*v); v++;
1575 #endif
1576       }
1577       v = bmat->a;
1578       for (i=0; i<bmat->nz; i++) {
1579 #if defined(PETSC_USE_COMPLEX)
1580         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1581 #else
1582         sum += (*v)*(*v); v++;
1583 #endif
1584       }
1585       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1586       *norm = sqrt(*norm);
1587     } else if (type == NORM_1) { /* max column norm */
1588       PetscReal *tmp,*tmp2;
1589       PetscInt  *jj,*garray = aij->garray;
1590       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1591       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1592       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1593       *norm = 0.0;
1594       v = amat->a; jj = amat->j;
1595       for (j=0; j<amat->nz; j++) {
1596         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1597       }
1598       v = bmat->a; jj = bmat->j;
1599       for (j=0; j<bmat->nz; j++) {
1600         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1601       }
1602       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1603       for (j=0; j<mat->cmap->N; j++) {
1604         if (tmp2[j] > *norm) *norm = tmp2[j];
1605       }
1606       ierr = PetscFree(tmp);CHKERRQ(ierr);
1607       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1608     } else if (type == NORM_INFINITY) { /* max row norm */
1609       PetscReal ntemp = 0.0;
1610       for (j=0; j<aij->A->rmap->n; j++) {
1611         v = amat->a + amat->i[j];
1612         sum = 0.0;
1613         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1614           sum += PetscAbsScalar(*v); v++;
1615         }
1616         v = bmat->a + bmat->i[j];
1617         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1618           sum += PetscAbsScalar(*v); v++;
1619         }
1620         if (sum > ntemp) ntemp = sum;
1621       }
1622       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1623     } else {
1624       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1625     }
1626   }
1627   PetscFunctionReturn(0);
1628 }
1629 
1630 #undef __FUNCT__
1631 #define __FUNCT__ "MatTranspose_MPIAIJ"
1632 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1633 {
1634   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1635   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1636   PetscErrorCode ierr;
1637   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1638   PetscInt       cstart=A->cmap->rstart,ncol;
1639   Mat            B;
1640   MatScalar      *array;
1641 
1642   PetscFunctionBegin;
1643   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1644 
1645   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1646   ai = Aloc->i; aj = Aloc->j;
1647   bi = Bloc->i; bj = Bloc->j;
1648   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1649     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1650     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1651     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1652     for (i=0; i<ai[ma]; i++){
1653       d_nnz[aj[i]] ++;
1654       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1655     }
1656 
1657     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1658     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1659     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1660     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1661     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1662   } else {
1663     B = *matout;
1664   }
1665 
1666   /* copy over the A part */
1667   array = Aloc->a;
1668   row = A->rmap->rstart;
1669   for (i=0; i<ma; i++) {
1670     ncol = ai[i+1]-ai[i];
1671     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1672     row++; array += ncol; aj += ncol;
1673   }
1674   aj = Aloc->j;
1675   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1676 
1677   /* copy over the B part */
1678   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1679   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1680   array = Bloc->a;
1681   row = A->rmap->rstart;
1682   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1683   cols_tmp = cols;
1684   for (i=0; i<mb; i++) {
1685     ncol = bi[i+1]-bi[i];
1686     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1687     row++; array += ncol; cols_tmp += ncol;
1688   }
1689   ierr = PetscFree(cols);CHKERRQ(ierr);
1690 
1691   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1692   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1693   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1694     *matout = B;
1695   } else {
1696     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1697   }
1698   PetscFunctionReturn(0);
1699 }
1700 
1701 #undef __FUNCT__
1702 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1703 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1704 {
1705   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1706   Mat            a = aij->A,b = aij->B;
1707   PetscErrorCode ierr;
1708   PetscInt       s1,s2,s3;
1709 
1710   PetscFunctionBegin;
1711   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1712   if (rr) {
1713     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1714     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1715     /* Overlap communication with computation. */
1716     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1717   }
1718   if (ll) {
1719     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1720     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1721     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1722   }
1723   /* scale  the diagonal block */
1724   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1725 
1726   if (rr) {
1727     /* Do a scatter end and then right scale the off-diagonal block */
1728     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1729     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1730   }
1731 
1732   PetscFunctionReturn(0);
1733 }
1734 
1735 #undef __FUNCT__
1736 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1737 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1738 {
1739   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1740   PetscErrorCode ierr;
1741 
1742   PetscFunctionBegin;
1743   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1744   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1745   PetscFunctionReturn(0);
1746 }
1747 #undef __FUNCT__
1748 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1749 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1750 {
1751   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1752   PetscErrorCode ierr;
1753 
1754   PetscFunctionBegin;
1755   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1756   PetscFunctionReturn(0);
1757 }
1758 
1759 #undef __FUNCT__
1760 #define __FUNCT__ "MatEqual_MPIAIJ"
1761 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1762 {
1763   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1764   Mat            a,b,c,d;
1765   PetscTruth     flg;
1766   PetscErrorCode ierr;
1767 
1768   PetscFunctionBegin;
1769   a = matA->A; b = matA->B;
1770   c = matB->A; d = matB->B;
1771 
1772   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1773   if (flg) {
1774     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1775   }
1776   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1777   PetscFunctionReturn(0);
1778 }
1779 
1780 #undef __FUNCT__
1781 #define __FUNCT__ "MatCopy_MPIAIJ"
1782 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1783 {
1784   PetscErrorCode ierr;
1785   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1786   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1787 
1788   PetscFunctionBegin;
1789   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1790   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1791     /* because of the column compression in the off-processor part of the matrix a->B,
1792        the number of columns in a->B and b->B may be different, hence we cannot call
1793        the MatCopy() directly on the two parts. If need be, we can provide a more
1794        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1795        then copying the submatrices */
1796     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1797   } else {
1798     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1799     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1800   }
1801   PetscFunctionReturn(0);
1802 }
1803 
1804 #undef __FUNCT__
1805 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1806 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1807 {
1808   PetscErrorCode ierr;
1809 
1810   PetscFunctionBegin;
1811   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1812   PetscFunctionReturn(0);
1813 }
1814 
1815 #include "petscblaslapack.h"
1816 #undef __FUNCT__
1817 #define __FUNCT__ "MatAXPY_MPIAIJ"
1818 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1819 {
1820   PetscErrorCode ierr;
1821   PetscInt       i;
1822   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1823   PetscBLASInt   bnz,one=1;
1824   Mat_SeqAIJ     *x,*y;
1825 
1826   PetscFunctionBegin;
1827   if (str == SAME_NONZERO_PATTERN) {
1828     PetscScalar alpha = a;
1829     x = (Mat_SeqAIJ *)xx->A->data;
1830     y = (Mat_SeqAIJ *)yy->A->data;
1831     bnz = PetscBLASIntCast(x->nz);
1832     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1833     x = (Mat_SeqAIJ *)xx->B->data;
1834     y = (Mat_SeqAIJ *)yy->B->data;
1835     bnz = PetscBLASIntCast(x->nz);
1836     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1837   } else if (str == SUBSET_NONZERO_PATTERN) {
1838     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1839 
1840     x = (Mat_SeqAIJ *)xx->B->data;
1841     y = (Mat_SeqAIJ *)yy->B->data;
1842     if (y->xtoy && y->XtoY != xx->B) {
1843       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1844       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1845     }
1846     if (!y->xtoy) { /* get xtoy */
1847       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1848       y->XtoY = xx->B;
1849       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1850     }
1851     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1852   } else {
1853     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1854   }
1855   PetscFunctionReturn(0);
1856 }
1857 
1858 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1859 
1860 #undef __FUNCT__
1861 #define __FUNCT__ "MatConjugate_MPIAIJ"
1862 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1863 {
1864 #if defined(PETSC_USE_COMPLEX)
1865   PetscErrorCode ierr;
1866   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1867 
1868   PetscFunctionBegin;
1869   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1870   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1871 #else
1872   PetscFunctionBegin;
1873 #endif
1874   PetscFunctionReturn(0);
1875 }
1876 
1877 #undef __FUNCT__
1878 #define __FUNCT__ "MatRealPart_MPIAIJ"
1879 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1880 {
1881   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1882   PetscErrorCode ierr;
1883 
1884   PetscFunctionBegin;
1885   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1886   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1887   PetscFunctionReturn(0);
1888 }
1889 
1890 #undef __FUNCT__
1891 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1892 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1893 {
1894   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1895   PetscErrorCode ierr;
1896 
1897   PetscFunctionBegin;
1898   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1899   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1900   PetscFunctionReturn(0);
1901 }
1902 
1903 #ifdef PETSC_HAVE_PBGL
1904 
1905 #include <boost/parallel/mpi/bsp_process_group.hpp>
1906 #include <boost/graph/distributed/ilu_default_graph.hpp>
1907 #include <boost/graph/distributed/ilu_0_block.hpp>
1908 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1909 #include <boost/graph/distributed/petsc/interface.hpp>
1910 #include <boost/multi_array.hpp>
1911 #include <boost/parallel/distributed_property_map->hpp>
1912 
1913 #undef __FUNCT__
1914 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1915 /*
1916   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1917 */
1918 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1919 {
1920   namespace petsc = boost::distributed::petsc;
1921 
1922   namespace graph_dist = boost::graph::distributed;
1923   using boost::graph::distributed::ilu_default::process_group_type;
1924   using boost::graph::ilu_permuted;
1925 
1926   PetscTruth      row_identity, col_identity;
1927   PetscContainer  c;
1928   PetscInt        m, n, M, N;
1929   PetscErrorCode  ierr;
1930 
1931   PetscFunctionBegin;
1932   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1933   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1934   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1935   if (!row_identity || !col_identity) {
1936     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1937   }
1938 
1939   process_group_type pg;
1940   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1941   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1942   lgraph_type&   level_graph = *lgraph_p;
1943   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1944 
1945   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1946   ilu_permuted(level_graph);
1947 
1948   /* put together the new matrix */
1949   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1950   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1951   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1952   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1953   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1954   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1955   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1956 
1957   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1958   ierr = PetscContainerSetPointer(c, lgraph_p);
1959   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1960   PetscFunctionReturn(0);
1961 }
1962 
1963 #undef __FUNCT__
1964 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1965 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1966 {
1967   PetscFunctionBegin;
1968   PetscFunctionReturn(0);
1969 }
1970 
1971 #undef __FUNCT__
1972 #define __FUNCT__ "MatSolve_MPIAIJ"
1973 /*
1974   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1975 */
1976 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1977 {
1978   namespace graph_dist = boost::graph::distributed;
1979 
1980   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1981   lgraph_type*   lgraph_p;
1982   PetscContainer c;
1983   PetscErrorCode ierr;
1984 
1985   PetscFunctionBegin;
1986   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1987   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1988   ierr = VecCopy(b, x);CHKERRQ(ierr);
1989 
1990   PetscScalar* array_x;
1991   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1992   PetscInt sx;
1993   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1994 
1995   PetscScalar* array_b;
1996   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1997   PetscInt sb;
1998   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1999 
2000   lgraph_type&   level_graph = *lgraph_p;
2001   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2002 
2003   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2004   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2005                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2006 
2007   typedef boost::iterator_property_map<array_ref_type::iterator,
2008                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2009   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2010                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2011 
2012   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2013 
2014   PetscFunctionReturn(0);
2015 }
2016 #endif
2017 
2018 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2019   PetscInt       nzlocal,nsends,nrecvs;
2020   PetscMPIInt    *send_rank;
2021   PetscInt       *sbuf_nz,*sbuf_j,**rbuf_j;
2022   PetscScalar    *sbuf_a,**rbuf_a;
2023   PetscErrorCode (*MatDestroy)(Mat);
2024 } Mat_Redundant;
2025 
2026 #undef __FUNCT__
2027 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2028 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2029 {
2030   PetscErrorCode       ierr;
2031   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2032   PetscInt             i;
2033 
2034   PetscFunctionBegin;
2035   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
2036   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2037   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2038   for (i=0; i<redund->nrecvs; i++){
2039     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2040     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2041   }
2042   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2043   ierr = PetscFree(redund);CHKERRQ(ierr);
2044   PetscFunctionReturn(0);
2045 }
2046 
2047 #undef __FUNCT__
2048 #define __FUNCT__ "MatDestroy_MatRedundant"
2049 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2050 {
2051   PetscErrorCode  ierr;
2052   PetscContainer  container;
2053   Mat_Redundant   *redund=PETSC_NULL;
2054 
2055   PetscFunctionBegin;
2056   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2057   if (container) {
2058     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2059   } else {
2060     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2061   }
2062   A->ops->destroy = redund->MatDestroy;
2063   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2064   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2065   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2066   PetscFunctionReturn(0);
2067 }
2068 
2069 #undef __FUNCT__
2070 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2071 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2072 {
2073   PetscMPIInt    rank,size;
2074   MPI_Comm       comm=((PetscObject)mat)->comm;
2075   PetscErrorCode ierr;
2076   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2077   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2078   PetscInt       *rowrange=mat->rmap->range;
2079   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2080   Mat            A=aij->A,B=aij->B,C=*matredundant;
2081   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2082   PetscScalar    *sbuf_a;
2083   PetscInt       nzlocal=a->nz+b->nz;
2084   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2085   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2086   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2087   MatScalar      *aworkA,*aworkB;
2088   PetscScalar    *vals;
2089   PetscMPIInt    tag1,tag2,tag3,imdex;
2090   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2091                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2092   MPI_Status     recv_status,*send_status;
2093   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2094   PetscInt       **rbuf_j=PETSC_NULL;
2095   PetscScalar    **rbuf_a=PETSC_NULL;
2096   Mat_Redundant  *redund=PETSC_NULL;
2097   PetscContainer container;
2098 
2099   PetscFunctionBegin;
2100   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2101   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2102 
2103   if (reuse == MAT_REUSE_MATRIX) {
2104     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2105     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2106     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2107     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2108     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2109     if (container) {
2110       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2111     } else {
2112       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2113     }
2114     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2115 
2116     nsends    = redund->nsends;
2117     nrecvs    = redund->nrecvs;
2118     send_rank = redund->send_rank; recv_rank = send_rank + size;
2119     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
2120     sbuf_j    = redund->sbuf_j;
2121     sbuf_a    = redund->sbuf_a;
2122     rbuf_j    = redund->rbuf_j;
2123     rbuf_a    = redund->rbuf_a;
2124   }
2125 
2126   if (reuse == MAT_INITIAL_MATRIX){
2127     PetscMPIInt  subrank,subsize;
2128     PetscInt     nleftover,np_subcomm;
2129     /* get the destination processors' id send_rank, nsends and nrecvs */
2130     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2131     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2132     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
2133     recv_rank = send_rank + size;
2134     np_subcomm = size/nsubcomm;
2135     nleftover  = size - nsubcomm*np_subcomm;
2136     nsends = 0; nrecvs = 0;
2137     for (i=0; i<size; i++){ /* i=rank*/
2138       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2139         send_rank[nsends] = i; nsends++;
2140         recv_rank[nrecvs++] = i;
2141       }
2142     }
2143     if (rank >= size - nleftover){/* this proc is a leftover processor */
2144       i = size-nleftover-1;
2145       j = 0;
2146       while (j < nsubcomm - nleftover){
2147         send_rank[nsends++] = i;
2148         i--; j++;
2149       }
2150     }
2151 
2152     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2153       for (i=0; i<nleftover; i++){
2154         recv_rank[nrecvs++] = size-nleftover+i;
2155       }
2156     }
2157 
2158     /* allocate sbuf_j, sbuf_a */
2159     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2160     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2161     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2162   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2163 
2164   /* copy mat's local entries into the buffers */
2165   if (reuse == MAT_INITIAL_MATRIX){
2166     rownz_max = 0;
2167     rptr = sbuf_j;
2168     cols = sbuf_j + rend-rstart + 1;
2169     vals = sbuf_a;
2170     rptr[0] = 0;
2171     for (i=0; i<rend-rstart; i++){
2172       row = i + rstart;
2173       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2174       ncols  = nzA + nzB;
2175       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2176       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2177       /* load the column indices for this row into cols */
2178       lwrite = 0;
2179       for (l=0; l<nzB; l++) {
2180         if ((ctmp = bmap[cworkB[l]]) < cstart){
2181           vals[lwrite]   = aworkB[l];
2182           cols[lwrite++] = ctmp;
2183         }
2184       }
2185       for (l=0; l<nzA; l++){
2186         vals[lwrite]   = aworkA[l];
2187         cols[lwrite++] = cstart + cworkA[l];
2188       }
2189       for (l=0; l<nzB; l++) {
2190         if ((ctmp = bmap[cworkB[l]]) >= cend){
2191           vals[lwrite]   = aworkB[l];
2192           cols[lwrite++] = ctmp;
2193         }
2194       }
2195       vals += ncols;
2196       cols += ncols;
2197       rptr[i+1] = rptr[i] + ncols;
2198       if (rownz_max < ncols) rownz_max = ncols;
2199     }
2200     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2201   } else { /* only copy matrix values into sbuf_a */
2202     rptr = sbuf_j;
2203     vals = sbuf_a;
2204     rptr[0] = 0;
2205     for (i=0; i<rend-rstart; i++){
2206       row = i + rstart;
2207       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2208       ncols  = nzA + nzB;
2209       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2210       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2211       lwrite = 0;
2212       for (l=0; l<nzB; l++) {
2213         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2214       }
2215       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2216       for (l=0; l<nzB; l++) {
2217         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2218       }
2219       vals += ncols;
2220       rptr[i+1] = rptr[i] + ncols;
2221     }
2222   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2223 
2224   /* send nzlocal to others, and recv other's nzlocal */
2225   /*--------------------------------------------------*/
2226   if (reuse == MAT_INITIAL_MATRIX){
2227     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2228     s_waits2 = s_waits3 + nsends;
2229     s_waits1 = s_waits2 + nsends;
2230     r_waits1 = s_waits1 + nsends;
2231     r_waits2 = r_waits1 + nrecvs;
2232     r_waits3 = r_waits2 + nrecvs;
2233   } else {
2234     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2235     r_waits3 = s_waits3 + nsends;
2236   }
2237 
2238   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2239   if (reuse == MAT_INITIAL_MATRIX){
2240     /* get new tags to keep the communication clean */
2241     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2242     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2243     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2244     rbuf_nz = sbuf_nz + nsends;
2245 
2246     /* post receives of other's nzlocal */
2247     for (i=0; i<nrecvs; i++){
2248       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2249     }
2250     /* send nzlocal to others */
2251     for (i=0; i<nsends; i++){
2252       sbuf_nz[i] = nzlocal;
2253       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2254     }
2255     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2256     count = nrecvs;
2257     while (count) {
2258       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2259       recv_rank[imdex] = recv_status.MPI_SOURCE;
2260       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2261       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2262 
2263       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2264       rbuf_nz[imdex] += i + 2;
2265       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2266       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2267       count--;
2268     }
2269     /* wait on sends of nzlocal */
2270     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2271     /* send mat->i,j to others, and recv from other's */
2272     /*------------------------------------------------*/
2273     for (i=0; i<nsends; i++){
2274       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2275       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2276     }
2277     /* wait on receives of mat->i,j */
2278     /*------------------------------*/
2279     count = nrecvs;
2280     while (count) {
2281       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2282       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2283       count--;
2284     }
2285     /* wait on sends of mat->i,j */
2286     /*---------------------------*/
2287     if (nsends) {
2288       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2289     }
2290   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2291 
2292   /* post receives, send and receive mat->a */
2293   /*----------------------------------------*/
2294   for (imdex=0; imdex<nrecvs; imdex++) {
2295     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2296   }
2297   for (i=0; i<nsends; i++){
2298     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2299   }
2300   count = nrecvs;
2301   while (count) {
2302     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2303     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2304     count--;
2305   }
2306   if (nsends) {
2307     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2308   }
2309 
2310   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2311 
2312   /* create redundant matrix */
2313   /*-------------------------*/
2314   if (reuse == MAT_INITIAL_MATRIX){
2315     /* compute rownz_max for preallocation */
2316     for (imdex=0; imdex<nrecvs; imdex++){
2317       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2318       rptr = rbuf_j[imdex];
2319       for (i=0; i<j; i++){
2320         ncols = rptr[i+1] - rptr[i];
2321         if (rownz_max < ncols) rownz_max = ncols;
2322       }
2323     }
2324 
2325     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2326     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2327     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2328     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2329     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2330   } else {
2331     C = *matredundant;
2332   }
2333 
2334   /* insert local matrix entries */
2335   rptr = sbuf_j;
2336   cols = sbuf_j + rend-rstart + 1;
2337   vals = sbuf_a;
2338   for (i=0; i<rend-rstart; i++){
2339     row   = i + rstart;
2340     ncols = rptr[i+1] - rptr[i];
2341     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2342     vals += ncols;
2343     cols += ncols;
2344   }
2345   /* insert received matrix entries */
2346   for (imdex=0; imdex<nrecvs; imdex++){
2347     rstart = rowrange[recv_rank[imdex]];
2348     rend   = rowrange[recv_rank[imdex]+1];
2349     rptr = rbuf_j[imdex];
2350     cols = rbuf_j[imdex] + rend-rstart + 1;
2351     vals = rbuf_a[imdex];
2352     for (i=0; i<rend-rstart; i++){
2353       row   = i + rstart;
2354       ncols = rptr[i+1] - rptr[i];
2355       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2356       vals += ncols;
2357       cols += ncols;
2358     }
2359   }
2360   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2361   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2362   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2363   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2364   if (reuse == MAT_INITIAL_MATRIX){
2365     PetscContainer container;
2366     *matredundant = C;
2367     /* create a supporting struct and attach it to C for reuse */
2368     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2369     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2370     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2371     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2372     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2373 
2374     redund->nzlocal = nzlocal;
2375     redund->nsends  = nsends;
2376     redund->nrecvs  = nrecvs;
2377     redund->send_rank = send_rank;
2378     redund->sbuf_nz = sbuf_nz;
2379     redund->sbuf_j  = sbuf_j;
2380     redund->sbuf_a  = sbuf_a;
2381     redund->rbuf_j  = rbuf_j;
2382     redund->rbuf_a  = rbuf_a;
2383 
2384     redund->MatDestroy = C->ops->destroy;
2385     C->ops->destroy    = MatDestroy_MatRedundant;
2386   }
2387   PetscFunctionReturn(0);
2388 }
2389 
2390 #undef __FUNCT__
2391 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2392 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2393 {
2394   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2395   PetscErrorCode ierr;
2396   PetscInt       i,*idxb = 0;
2397   PetscScalar    *va,*vb;
2398   Vec            vtmp;
2399 
2400   PetscFunctionBegin;
2401   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2402   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2403   if (idx) {
2404     for (i=0; i<A->rmap->n; i++) {
2405       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2406     }
2407   }
2408 
2409   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2410   if (idx) {
2411     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2412   }
2413   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2414   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2415 
2416   for (i=0; i<A->rmap->n; i++){
2417     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2418       va[i] = vb[i];
2419       if (idx) idx[i] = a->garray[idxb[i]];
2420     }
2421   }
2422 
2423   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2424   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2425   if (idxb) {
2426     ierr = PetscFree(idxb);CHKERRQ(ierr);
2427   }
2428   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2429   PetscFunctionReturn(0);
2430 }
2431 
2432 #undef __FUNCT__
2433 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2434 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2435 {
2436   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2437   PetscErrorCode ierr;
2438   PetscInt       i,*idxb = 0;
2439   PetscScalar    *va,*vb;
2440   Vec            vtmp;
2441 
2442   PetscFunctionBegin;
2443   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2444   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2445   if (idx) {
2446     for (i=0; i<A->cmap->n; i++) {
2447       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2448     }
2449   }
2450 
2451   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2452   if (idx) {
2453     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2454   }
2455   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2456   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2457 
2458   for (i=0; i<A->rmap->n; i++){
2459     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2460       va[i] = vb[i];
2461       if (idx) idx[i] = a->garray[idxb[i]];
2462     }
2463   }
2464 
2465   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2466   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2467   if (idxb) {
2468     ierr = PetscFree(idxb);CHKERRQ(ierr);
2469   }
2470   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2471   PetscFunctionReturn(0);
2472 }
2473 
2474 #undef __FUNCT__
2475 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2476 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2477 {
2478   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2479   PetscInt       n      = A->rmap->n;
2480   PetscInt       cstart = A->cmap->rstart;
2481   PetscInt      *cmap   = mat->garray;
2482   PetscInt      *diagIdx, *offdiagIdx;
2483   Vec            diagV, offdiagV;
2484   PetscScalar   *a, *diagA, *offdiagA;
2485   PetscInt       r;
2486   PetscErrorCode ierr;
2487 
2488   PetscFunctionBegin;
2489   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2490   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2491   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2492   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2493   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2494   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2495   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2496   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2497   for(r = 0; r < n; ++r) {
2498     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2499       a[r]   = diagA[r];
2500       idx[r] = cstart + diagIdx[r];
2501     } else {
2502       a[r]   = offdiagA[r];
2503       idx[r] = cmap[offdiagIdx[r]];
2504     }
2505   }
2506   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2507   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2508   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2509   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2510   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2511   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2512   PetscFunctionReturn(0);
2513 }
2514 
2515 #undef __FUNCT__
2516 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2517 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2518 {
2519   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2520   PetscInt       n      = A->rmap->n;
2521   PetscInt       cstart = A->cmap->rstart;
2522   PetscInt      *cmap   = mat->garray;
2523   PetscInt      *diagIdx, *offdiagIdx;
2524   Vec            diagV, offdiagV;
2525   PetscScalar   *a, *diagA, *offdiagA;
2526   PetscInt       r;
2527   PetscErrorCode ierr;
2528 
2529   PetscFunctionBegin;
2530   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2531   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2532   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2533   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2534   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2535   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2536   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2537   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2538   for(r = 0; r < n; ++r) {
2539     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2540       a[r]   = diagA[r];
2541       idx[r] = cstart + diagIdx[r];
2542     } else {
2543       a[r]   = offdiagA[r];
2544       idx[r] = cmap[offdiagIdx[r]];
2545     }
2546   }
2547   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2548   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2549   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2550   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2551   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2552   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2553   PetscFunctionReturn(0);
2554 }
2555 
2556 #undef __FUNCT__
2557 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2558 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[])
2559 {
2560   PetscErrorCode ierr;
2561 
2562   PetscFunctionBegin;
2563   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
2564   PetscFunctionReturn(0);
2565 }
2566 
2567 /* -------------------------------------------------------------------*/
2568 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2569        MatGetRow_MPIAIJ,
2570        MatRestoreRow_MPIAIJ,
2571        MatMult_MPIAIJ,
2572 /* 4*/ MatMultAdd_MPIAIJ,
2573        MatMultTranspose_MPIAIJ,
2574        MatMultTransposeAdd_MPIAIJ,
2575 #ifdef PETSC_HAVE_PBGL
2576        MatSolve_MPIAIJ,
2577 #else
2578        0,
2579 #endif
2580        0,
2581        0,
2582 /*10*/ 0,
2583        0,
2584        0,
2585        MatRelax_MPIAIJ,
2586        MatTranspose_MPIAIJ,
2587 /*15*/ MatGetInfo_MPIAIJ,
2588        MatEqual_MPIAIJ,
2589        MatGetDiagonal_MPIAIJ,
2590        MatDiagonalScale_MPIAIJ,
2591        MatNorm_MPIAIJ,
2592 /*20*/ MatAssemblyBegin_MPIAIJ,
2593        MatAssemblyEnd_MPIAIJ,
2594        MatSetOption_MPIAIJ,
2595        MatZeroEntries_MPIAIJ,
2596 /*24*/ MatZeroRows_MPIAIJ,
2597        0,
2598 #ifdef PETSC_HAVE_PBGL
2599        0,
2600 #else
2601        0,
2602 #endif
2603        0,
2604        0,
2605 /*29*/ MatSetUpPreallocation_MPIAIJ,
2606 #ifdef PETSC_HAVE_PBGL
2607        0,
2608 #else
2609        0,
2610 #endif
2611        0,
2612        0,
2613        0,
2614 /*34*/ MatDuplicate_MPIAIJ,
2615        0,
2616        0,
2617        0,
2618        0,
2619 /*39*/ MatAXPY_MPIAIJ,
2620        MatGetSubMatrices_MPIAIJ,
2621        MatIncreaseOverlap_MPIAIJ,
2622        MatGetValues_MPIAIJ,
2623        MatCopy_MPIAIJ,
2624 /*44*/ MatGetRowMax_MPIAIJ,
2625        MatScale_MPIAIJ,
2626        0,
2627        0,
2628        0,
2629 /*49*/ MatSetBlockSize_MPIAIJ,
2630        0,
2631        0,
2632        0,
2633        0,
2634 /*54*/ MatFDColoringCreate_MPIAIJ,
2635        0,
2636        MatSetUnfactored_MPIAIJ,
2637        MatPermute_MPIAIJ,
2638        0,
2639 /*59*/ MatGetSubMatrix_MPIAIJ,
2640        MatDestroy_MPIAIJ,
2641        MatView_MPIAIJ,
2642        0,
2643        0,
2644 /*64*/ 0,
2645        0,
2646        0,
2647        0,
2648        0,
2649 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2650        MatGetRowMinAbs_MPIAIJ,
2651        0,
2652        MatSetColoring_MPIAIJ,
2653 #if defined(PETSC_HAVE_ADIC)
2654        MatSetValuesAdic_MPIAIJ,
2655 #else
2656        0,
2657 #endif
2658        MatSetValuesAdifor_MPIAIJ,
2659 /*75*/ 0,
2660        0,
2661        0,
2662        0,
2663        0,
2664 /*80*/ 0,
2665        0,
2666        0,
2667 /*83*/ MatLoad_MPIAIJ,
2668        0,
2669        0,
2670        0,
2671        0,
2672        0,
2673 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2674        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2675        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2676        MatPtAP_Basic,
2677        MatPtAPSymbolic_MPIAIJ,
2678 /*94*/ MatPtAPNumeric_MPIAIJ,
2679        0,
2680        0,
2681        0,
2682        0,
2683 /*99*/ 0,
2684        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2685        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2686        MatConjugate_MPIAIJ,
2687        0,
2688 /*104*/MatSetValuesRow_MPIAIJ,
2689        MatRealPart_MPIAIJ,
2690        MatImaginaryPart_MPIAIJ,
2691        0,
2692        0,
2693 /*109*/0,
2694        MatGetRedundantMatrix_MPIAIJ,
2695        MatGetRowMin_MPIAIJ,
2696        0,
2697        0,
2698 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
2699        0,
2700        0,
2701        0,
2702        0,
2703        0
2704 };
2705 
2706 /* ----------------------------------------------------------------------------------------*/
2707 
2708 EXTERN_C_BEGIN
2709 #undef __FUNCT__
2710 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2711 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2712 {
2713   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2714   PetscErrorCode ierr;
2715 
2716   PetscFunctionBegin;
2717   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2718   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2719   PetscFunctionReturn(0);
2720 }
2721 EXTERN_C_END
2722 
2723 EXTERN_C_BEGIN
2724 #undef __FUNCT__
2725 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2726 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2727 {
2728   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2729   PetscErrorCode ierr;
2730 
2731   PetscFunctionBegin;
2732   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2733   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2734   PetscFunctionReturn(0);
2735 }
2736 EXTERN_C_END
2737 
2738 #include "petscpc.h"
2739 EXTERN_C_BEGIN
2740 #undef __FUNCT__
2741 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2742 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2743 {
2744   Mat_MPIAIJ     *b;
2745   PetscErrorCode ierr;
2746   PetscInt       i;
2747 
2748   PetscFunctionBegin;
2749   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2750   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2751   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2752   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2753 
2754   ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2755   ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2756   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2757   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2758   if (d_nnz) {
2759     for (i=0; i<B->rmap->n; i++) {
2760       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2761     }
2762   }
2763   if (o_nnz) {
2764     for (i=0; i<B->rmap->n; i++) {
2765       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2766     }
2767   }
2768   b = (Mat_MPIAIJ*)B->data;
2769 
2770   if (!B->preallocated) {
2771     /* Explicitly create 2 MATSEQAIJ matrices. */
2772     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2773     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2774     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2775     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2776     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2777     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2778     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2779     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2780   }
2781 
2782   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2783   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2784   B->preallocated = PETSC_TRUE;
2785   PetscFunctionReturn(0);
2786 }
2787 EXTERN_C_END
2788 
2789 #undef __FUNCT__
2790 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2791 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2792 {
2793   Mat            mat;
2794   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2795   PetscErrorCode ierr;
2796 
2797   PetscFunctionBegin;
2798   *newmat       = 0;
2799   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2800   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2801   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2802   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2803   a    = (Mat_MPIAIJ*)mat->data;
2804 
2805   mat->factor       = matin->factor;
2806   mat->rmap->bs      = matin->rmap->bs;
2807   mat->assembled    = PETSC_TRUE;
2808   mat->insertmode   = NOT_SET_VALUES;
2809   mat->preallocated = PETSC_TRUE;
2810 
2811   a->size           = oldmat->size;
2812   a->rank           = oldmat->rank;
2813   a->donotstash     = oldmat->donotstash;
2814   a->roworiented    = oldmat->roworiented;
2815   a->rowindices     = 0;
2816   a->rowvalues      = 0;
2817   a->getrowactive   = PETSC_FALSE;
2818 
2819   ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr);
2820   ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr);
2821 
2822   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2823   if (oldmat->colmap) {
2824 #if defined (PETSC_USE_CTABLE)
2825     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2826 #else
2827     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2828     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2829     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2830 #endif
2831   } else a->colmap = 0;
2832   if (oldmat->garray) {
2833     PetscInt len;
2834     len  = oldmat->B->cmap->n;
2835     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2836     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2837     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2838   } else a->garray = 0;
2839 
2840   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2841   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2842   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2843   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2844   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2845   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2846   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2847   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2848   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2849   *newmat = mat;
2850   PetscFunctionReturn(0);
2851 }
2852 
2853 #include "petscsys.h"
2854 
2855 #undef __FUNCT__
2856 #define __FUNCT__ "MatLoad_MPIAIJ"
2857 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2858 {
2859   Mat            A;
2860   PetscScalar    *vals,*svals;
2861   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2862   MPI_Status     status;
2863   PetscErrorCode ierr;
2864   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz;
2865   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2866   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2867   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2868   PetscInt       cend,cstart,n,*rowners;
2869   int            fd;
2870 
2871   PetscFunctionBegin;
2872   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2873   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2874   if (!rank) {
2875     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2876     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2877     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2878   }
2879 
2880   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2881   M = header[1]; N = header[2];
2882   /* determine ownership of all rows */
2883   m    = M/size + ((M % size) > rank);
2884   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2885   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2886 
2887   /* First process needs enough room for process with most rows */
2888   if (!rank) {
2889     mmax       = rowners[1];
2890     for (i=2; i<size; i++) {
2891       mmax = PetscMax(mmax,rowners[i]);
2892     }
2893   } else mmax = m;
2894 
2895   rowners[0] = 0;
2896   for (i=2; i<=size; i++) {
2897     rowners[i] += rowners[i-1];
2898   }
2899   rstart = rowners[rank];
2900   rend   = rowners[rank+1];
2901 
2902   /* distribute row lengths to all processors */
2903   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2904   if (!rank) {
2905     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2906     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2907     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2908     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2909     for (j=0; j<m; j++) {
2910       procsnz[0] += ourlens[j];
2911     }
2912     for (i=1; i<size; i++) {
2913       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2914       /* calculate the number of nonzeros on each processor */
2915       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2916         procsnz[i] += rowlengths[j];
2917       }
2918       mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]);
2919       ierr   = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2920     }
2921     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2922   } else {
2923     mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr);
2924     ierr   = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2925   }
2926 
2927   if (!rank) {
2928     /* determine max buffer needed and allocate it */
2929     maxnz = 0;
2930     for (i=0; i<size; i++) {
2931       maxnz = PetscMax(maxnz,procsnz[i]);
2932     }
2933     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2934 
2935     /* read in my part of the matrix column indices  */
2936     nz   = procsnz[0];
2937     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2938     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2939 
2940     /* read in every one elses and ship off */
2941     for (i=1; i<size; i++) {
2942       nz     = procsnz[i];
2943       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2944       mpicnt = PetscMPIIntCast(nz);
2945       ierr   = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2946     }
2947     ierr = PetscFree(cols);CHKERRQ(ierr);
2948   } else {
2949     /* determine buffer space needed for message */
2950     nz = 0;
2951     for (i=0; i<m; i++) {
2952       nz += ourlens[i];
2953     }
2954     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2955 
2956     /* receive message of column indices*/
2957     mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr);
2958     ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2959     ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr);
2960     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
2961     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
2962     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
2963   }
2964 
2965   /* determine column ownership if matrix is not square */
2966   if (N != M) {
2967     n      = N/size + ((N % size) > rank);
2968     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2969     cstart = cend - n;
2970   } else {
2971     cstart = rstart;
2972     cend   = rend;
2973     n      = cend - cstart;
2974   }
2975 
2976   /* loop over local rows, determining number of off diagonal entries */
2977   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2978   jj = 0;
2979   for (i=0; i<m; i++) {
2980     for (j=0; j<ourlens[i]; j++) {
2981       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2982       jj++;
2983     }
2984   }
2985 
2986   /* create our matrix */
2987   for (i=0; i<m; i++) {
2988     ourlens[i] -= offlens[i];
2989   }
2990   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2991   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2992   ierr = MatSetType(A,type);CHKERRQ(ierr);
2993   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2994 
2995   for (i=0; i<m; i++) {
2996     ourlens[i] += offlens[i];
2997   }
2998 
2999   if (!rank) {
3000     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3001 
3002     /* read in my part of the matrix numerical values  */
3003     nz   = procsnz[0];
3004     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3005 
3006     /* insert into matrix */
3007     jj      = rstart;
3008     smycols = mycols;
3009     svals   = vals;
3010     for (i=0; i<m; i++) {
3011       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3012       smycols += ourlens[i];
3013       svals   += ourlens[i];
3014       jj++;
3015     }
3016 
3017     /* read in other processors and ship out */
3018     for (i=1; i<size; i++) {
3019       nz     = procsnz[i];
3020       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3021       mpicnt = PetscMPIIntCast(nz);
3022       ierr   = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3023     }
3024     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3025   } else {
3026     /* receive numeric values */
3027     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3028 
3029     /* receive message of values*/
3030     mpicnt = PetscMPIIntCast(nz);
3031     ierr   = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
3032     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr);
3033     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
3034     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
3035     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
3036 
3037     /* insert into matrix */
3038     jj      = rstart;
3039     smycols = mycols;
3040     svals   = vals;
3041     for (i=0; i<m; i++) {
3042       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3043       smycols += ourlens[i];
3044       svals   += ourlens[i];
3045       jj++;
3046     }
3047   }
3048   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3049   ierr = PetscFree(vals);CHKERRQ(ierr);
3050   ierr = PetscFree(mycols);CHKERRQ(ierr);
3051   ierr = PetscFree(rowners);CHKERRQ(ierr);
3052 
3053   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3054   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3055   *newmat = A;
3056   PetscFunctionReturn(0);
3057 }
3058 
3059 #undef __FUNCT__
3060 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3061 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3062 {
3063   PetscErrorCode ierr;
3064   IS             iscol_local;
3065   PetscInt       csize;
3066 
3067   PetscFunctionBegin;
3068   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3069   if (call == MAT_REUSE_MATRIX) {
3070     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3071     if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3072   } else {
3073     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3074   }
3075   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3076   if (call == MAT_INITIAL_MATRIX) {
3077     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3078     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3079   }
3080   PetscFunctionReturn(0);
3081 }
3082 
3083 #undef __FUNCT__
3084 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3085 /*
3086     Not great since it makes two copies of the submatrix, first an SeqAIJ
3087   in local and then by concatenating the local matrices the end result.
3088   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3089 
3090   Note: This requires a sequential iscol with all indices.
3091 */
3092 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3093 {
3094   PetscErrorCode ierr;
3095   PetscMPIInt    rank,size;
3096   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3097   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3098   Mat            *local,M,Mreuse;
3099   MatScalar      *vwork,*aa;
3100   MPI_Comm       comm = ((PetscObject)mat)->comm;
3101   Mat_SeqAIJ     *aij;
3102 
3103 
3104   PetscFunctionBegin;
3105   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3106   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3107 
3108   if (call ==  MAT_REUSE_MATRIX) {
3109     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3110     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3111     local = &Mreuse;
3112     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3113   } else {
3114     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3115     Mreuse = *local;
3116     ierr   = PetscFree(local);CHKERRQ(ierr);
3117   }
3118 
3119   /*
3120       m - number of local rows
3121       n - number of columns (same on all processors)
3122       rstart - first row in new global matrix generated
3123   */
3124   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3125   if (call == MAT_INITIAL_MATRIX) {
3126     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3127     ii  = aij->i;
3128     jj  = aij->j;
3129 
3130     /*
3131         Determine the number of non-zeros in the diagonal and off-diagonal
3132         portions of the matrix in order to do correct preallocation
3133     */
3134 
3135     /* first get start and end of "diagonal" columns */
3136     if (csize == PETSC_DECIDE) {
3137       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3138       if (mglobal == n) { /* square matrix */
3139 	nlocal = m;
3140       } else {
3141         nlocal = n/size + ((n % size) > rank);
3142       }
3143     } else {
3144       nlocal = csize;
3145     }
3146     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3147     rstart = rend - nlocal;
3148     if (rank == size - 1 && rend != n) {
3149       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3150     }
3151 
3152     /* next, compute all the lengths */
3153     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3154     olens = dlens + m;
3155     for (i=0; i<m; i++) {
3156       jend = ii[i+1] - ii[i];
3157       olen = 0;
3158       dlen = 0;
3159       for (j=0; j<jend; j++) {
3160         if (*jj < rstart || *jj >= rend) olen++;
3161         else dlen++;
3162         jj++;
3163       }
3164       olens[i] = olen;
3165       dlens[i] = dlen;
3166     }
3167     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3168     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3169     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3170     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3171     ierr = PetscFree(dlens);CHKERRQ(ierr);
3172   } else {
3173     PetscInt ml,nl;
3174 
3175     M = *newmat;
3176     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3177     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3178     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3179     /*
3180          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3181        rather than the slower MatSetValues().
3182     */
3183     M->was_assembled = PETSC_TRUE;
3184     M->assembled     = PETSC_FALSE;
3185   }
3186   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3187   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3188   ii  = aij->i;
3189   jj  = aij->j;
3190   aa  = aij->a;
3191   for (i=0; i<m; i++) {
3192     row   = rstart + i;
3193     nz    = ii[i+1] - ii[i];
3194     cwork = jj;     jj += nz;
3195     vwork = aa;     aa += nz;
3196     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3197   }
3198 
3199   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3200   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3201   *newmat = M;
3202 
3203   /* save submatrix used in processor for next request */
3204   if (call ==  MAT_INITIAL_MATRIX) {
3205     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3206     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3207   }
3208 
3209   PetscFunctionReturn(0);
3210 }
3211 
3212 EXTERN_C_BEGIN
3213 #undef __FUNCT__
3214 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3215 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3216 {
3217   PetscInt       m,cstart, cend,j,nnz,i,d;
3218   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3219   const PetscInt *JJ;
3220   PetscScalar    *values;
3221   PetscErrorCode ierr;
3222 
3223   PetscFunctionBegin;
3224   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3225 
3226   ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3227   ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3228   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
3229   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
3230   m      = B->rmap->n;
3231   cstart = B->cmap->rstart;
3232   cend   = B->cmap->rend;
3233   rstart = B->rmap->rstart;
3234 
3235   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
3236   o_nnz = d_nnz + m;
3237 
3238 #if defined(PETSC_USE_DEBUGGING)
3239   for (i=0; i<m; i++) {
3240     nnz     = Ii[i+1]- Ii[i];
3241     JJ      = J + Ii[i];
3242     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3243     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3244     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3245     for (j=1; j<nnz; j++) {
3246       if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j);
3247     }
3248   }
3249 #endif
3250 
3251   for (i=0; i<m; i++) {
3252     nnz     = Ii[i+1]- Ii[i];
3253     JJ      = J + Ii[i];
3254     nnz_max = PetscMax(nnz_max,nnz);
3255     for (j=0; j<nnz; j++) {
3256       if (*JJ >= cstart) break;
3257       JJ++;
3258     }
3259     d = 0;
3260     for (; j<nnz; j++) {
3261       if (*JJ++ >= cend) break;
3262       d++;
3263     }
3264     d_nnz[i] = d;
3265     o_nnz[i] = nnz - d;
3266   }
3267   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3268   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
3269 
3270   if (v) values = (PetscScalar*)v;
3271   else {
3272     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3273     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3274   }
3275 
3276   for (i=0; i<m; i++) {
3277     ii   = i + rstart;
3278     nnz  = Ii[i+1]- Ii[i];
3279     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3280   }
3281   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3282   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3283 
3284   if (!v) {
3285     ierr = PetscFree(values);CHKERRQ(ierr);
3286   }
3287   PetscFunctionReturn(0);
3288 }
3289 EXTERN_C_END
3290 
3291 #undef __FUNCT__
3292 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3293 /*@
3294    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3295    (the default parallel PETSc format).
3296 
3297    Collective on MPI_Comm
3298 
3299    Input Parameters:
3300 +  B - the matrix
3301 .  i - the indices into j for the start of each local row (starts with zero)
3302 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3303 -  v - optional values in the matrix
3304 
3305    Level: developer
3306 
3307    Notes:
3308        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3309      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3310      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3311 
3312        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3313 
3314        The format which is used for the sparse matrix input, is equivalent to a
3315     row-major ordering.. i.e for the following matrix, the input data expected is
3316     as shown:
3317 
3318         1 0 0
3319         2 0 3     P0
3320        -------
3321         4 5 6     P1
3322 
3323      Process0 [P0]: rows_owned=[0,1]
3324         i =  {0,1,3}  [size = nrow+1  = 2+1]
3325         j =  {0,0,2}  [size = nz = 6]
3326         v =  {1,2,3}  [size = nz = 6]
3327 
3328      Process1 [P1]: rows_owned=[2]
3329         i =  {0,3}    [size = nrow+1  = 1+1]
3330         j =  {0,1,2}  [size = nz = 6]
3331         v =  {4,5,6}  [size = nz = 6]
3332 
3333       The column indices for each row MUST be sorted.
3334 
3335 .keywords: matrix, aij, compressed row, sparse, parallel
3336 
3337 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3338           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3339 @*/
3340 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3341 {
3342   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3343 
3344   PetscFunctionBegin;
3345   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3346   if (f) {
3347     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3348   }
3349   PetscFunctionReturn(0);
3350 }
3351 
3352 #undef __FUNCT__
3353 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3354 /*@C
3355    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3356    (the default parallel PETSc format).  For good matrix assembly performance
3357    the user should preallocate the matrix storage by setting the parameters
3358    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3359    performance can be increased by more than a factor of 50.
3360 
3361    Collective on MPI_Comm
3362 
3363    Input Parameters:
3364 +  A - the matrix
3365 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3366            (same value is used for all local rows)
3367 .  d_nnz - array containing the number of nonzeros in the various rows of the
3368            DIAGONAL portion of the local submatrix (possibly different for each row)
3369            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3370            The size of this array is equal to the number of local rows, i.e 'm'.
3371            You must leave room for the diagonal entry even if it is zero.
3372 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3373            submatrix (same value is used for all local rows).
3374 -  o_nnz - array containing the number of nonzeros in the various rows of the
3375            OFF-DIAGONAL portion of the local submatrix (possibly different for
3376            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3377            structure. The size of this array is equal to the number
3378            of local rows, i.e 'm'.
3379 
3380    If the *_nnz parameter is given then the *_nz parameter is ignored
3381 
3382    The AIJ format (also called the Yale sparse matrix format or
3383    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3384    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3385 
3386    The parallel matrix is partitioned such that the first m0 rows belong to
3387    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3388    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3389 
3390    The DIAGONAL portion of the local submatrix of a processor can be defined
3391    as the submatrix which is obtained by extraction the part corresponding
3392    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3393    first row that belongs to the processor, and r2 is the last row belonging
3394    to the this processor. This is a square mxm matrix. The remaining portion
3395    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3396 
3397    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3398 
3399    You can call MatGetInfo() to get information on how effective the preallocation was;
3400    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3401    You can also run with the option -info and look for messages with the string
3402    malloc in them to see if additional memory allocation was needed.
3403 
3404    Example usage:
3405 
3406    Consider the following 8x8 matrix with 34 non-zero values, that is
3407    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3408    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3409    as follows:
3410 
3411 .vb
3412             1  2  0  |  0  3  0  |  0  4
3413     Proc0   0  5  6  |  7  0  0  |  8  0
3414             9  0 10  | 11  0  0  | 12  0
3415     -------------------------------------
3416            13  0 14  | 15 16 17  |  0  0
3417     Proc1   0 18  0  | 19 20 21  |  0  0
3418             0  0  0  | 22 23  0  | 24  0
3419     -------------------------------------
3420     Proc2  25 26 27  |  0  0 28  | 29  0
3421            30  0  0  | 31 32 33  |  0 34
3422 .ve
3423 
3424    This can be represented as a collection of submatrices as:
3425 
3426 .vb
3427       A B C
3428       D E F
3429       G H I
3430 .ve
3431 
3432    Where the submatrices A,B,C are owned by proc0, D,E,F are
3433    owned by proc1, G,H,I are owned by proc2.
3434 
3435    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3436    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3437    The 'M','N' parameters are 8,8, and have the same values on all procs.
3438 
3439    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3440    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3441    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3442    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3443    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3444    matrix, ans [DF] as another SeqAIJ matrix.
3445 
3446    When d_nz, o_nz parameters are specified, d_nz storage elements are
3447    allocated for every row of the local diagonal submatrix, and o_nz
3448    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3449    One way to choose d_nz and o_nz is to use the max nonzerors per local
3450    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3451    In this case, the values of d_nz,o_nz are:
3452 .vb
3453      proc0 : dnz = 2, o_nz = 2
3454      proc1 : dnz = 3, o_nz = 2
3455      proc2 : dnz = 1, o_nz = 4
3456 .ve
3457    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3458    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3459    for proc3. i.e we are using 12+15+10=37 storage locations to store
3460    34 values.
3461 
3462    When d_nnz, o_nnz parameters are specified, the storage is specified
3463    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3464    In the above case the values for d_nnz,o_nnz are:
3465 .vb
3466      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3467      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3468      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3469 .ve
3470    Here the space allocated is sum of all the above values i.e 34, and
3471    hence pre-allocation is perfect.
3472 
3473    Level: intermediate
3474 
3475 .keywords: matrix, aij, compressed row, sparse, parallel
3476 
3477 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3478           MPIAIJ, MatGetInfo()
3479 @*/
3480 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3481 {
3482   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3483 
3484   PetscFunctionBegin;
3485   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3486   if (f) {
3487     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3488   }
3489   PetscFunctionReturn(0);
3490 }
3491 
3492 #undef __FUNCT__
3493 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3494 /*@
3495      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3496          CSR format the local rows.
3497 
3498    Collective on MPI_Comm
3499 
3500    Input Parameters:
3501 +  comm - MPI communicator
3502 .  m - number of local rows (Cannot be PETSC_DECIDE)
3503 .  n - This value should be the same as the local size used in creating the
3504        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3505        calculated if N is given) For square matrices n is almost always m.
3506 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3507 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3508 .   i - row indices
3509 .   j - column indices
3510 -   a - matrix values
3511 
3512    Output Parameter:
3513 .   mat - the matrix
3514 
3515    Level: intermediate
3516 
3517    Notes:
3518        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3519      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3520      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3521 
3522        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3523 
3524        The format which is used for the sparse matrix input, is equivalent to a
3525     row-major ordering.. i.e for the following matrix, the input data expected is
3526     as shown:
3527 
3528         1 0 0
3529         2 0 3     P0
3530        -------
3531         4 5 6     P1
3532 
3533      Process0 [P0]: rows_owned=[0,1]
3534         i =  {0,1,3}  [size = nrow+1  = 2+1]
3535         j =  {0,0,2}  [size = nz = 6]
3536         v =  {1,2,3}  [size = nz = 6]
3537 
3538      Process1 [P1]: rows_owned=[2]
3539         i =  {0,3}    [size = nrow+1  = 1+1]
3540         j =  {0,1,2}  [size = nz = 6]
3541         v =  {4,5,6}  [size = nz = 6]
3542 
3543 .keywords: matrix, aij, compressed row, sparse, parallel
3544 
3545 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3546           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3547 @*/
3548 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3549 {
3550   PetscErrorCode ierr;
3551 
3552  PetscFunctionBegin;
3553   if (i[0]) {
3554     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3555   }
3556   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3557   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3558   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3559   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3560   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3561   PetscFunctionReturn(0);
3562 }
3563 
3564 #undef __FUNCT__
3565 #define __FUNCT__ "MatCreateMPIAIJ"
3566 /*@C
3567    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3568    (the default parallel PETSc format).  For good matrix assembly performance
3569    the user should preallocate the matrix storage by setting the parameters
3570    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3571    performance can be increased by more than a factor of 50.
3572 
3573    Collective on MPI_Comm
3574 
3575    Input Parameters:
3576 +  comm - MPI communicator
3577 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3578            This value should be the same as the local size used in creating the
3579            y vector for the matrix-vector product y = Ax.
3580 .  n - This value should be the same as the local size used in creating the
3581        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3582        calculated if N is given) For square matrices n is almost always m.
3583 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3584 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3585 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3586            (same value is used for all local rows)
3587 .  d_nnz - array containing the number of nonzeros in the various rows of the
3588            DIAGONAL portion of the local submatrix (possibly different for each row)
3589            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3590            The size of this array is equal to the number of local rows, i.e 'm'.
3591            You must leave room for the diagonal entry even if it is zero.
3592 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3593            submatrix (same value is used for all local rows).
3594 -  o_nnz - array containing the number of nonzeros in the various rows of the
3595            OFF-DIAGONAL portion of the local submatrix (possibly different for
3596            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3597            structure. The size of this array is equal to the number
3598            of local rows, i.e 'm'.
3599 
3600    Output Parameter:
3601 .  A - the matrix
3602 
3603    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3604    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3605    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3606 
3607    Notes:
3608    If the *_nnz parameter is given then the *_nz parameter is ignored
3609 
3610    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3611    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3612    storage requirements for this matrix.
3613 
3614    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3615    processor than it must be used on all processors that share the object for
3616    that argument.
3617 
3618    The user MUST specify either the local or global matrix dimensions
3619    (possibly both).
3620 
3621    The parallel matrix is partitioned across processors such that the
3622    first m0 rows belong to process 0, the next m1 rows belong to
3623    process 1, the next m2 rows belong to process 2 etc.. where
3624    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3625    values corresponding to [m x N] submatrix.
3626 
3627    The columns are logically partitioned with the n0 columns belonging
3628    to 0th partition, the next n1 columns belonging to the next
3629    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3630 
3631    The DIAGONAL portion of the local submatrix on any given processor
3632    is the submatrix corresponding to the rows and columns m,n
3633    corresponding to the given processor. i.e diagonal matrix on
3634    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3635    etc. The remaining portion of the local submatrix [m x (N-n)]
3636    constitute the OFF-DIAGONAL portion. The example below better
3637    illustrates this concept.
3638 
3639    For a square global matrix we define each processor's diagonal portion
3640    to be its local rows and the corresponding columns (a square submatrix);
3641    each processor's off-diagonal portion encompasses the remainder of the
3642    local matrix (a rectangular submatrix).
3643 
3644    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3645 
3646    When calling this routine with a single process communicator, a matrix of
3647    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3648    type of communicator, use the construction mechanism:
3649      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3650 
3651    By default, this format uses inodes (identical nodes) when possible.
3652    We search for consecutive rows with the same nonzero structure, thereby
3653    reusing matrix information to achieve increased efficiency.
3654 
3655    Options Database Keys:
3656 +  -mat_no_inode  - Do not use inodes
3657 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3658 -  -mat_aij_oneindex - Internally use indexing starting at 1
3659         rather than 0.  Note that when calling MatSetValues(),
3660         the user still MUST index entries starting at 0!
3661 
3662 
3663    Example usage:
3664 
3665    Consider the following 8x8 matrix with 34 non-zero values, that is
3666    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3667    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3668    as follows:
3669 
3670 .vb
3671             1  2  0  |  0  3  0  |  0  4
3672     Proc0   0  5  6  |  7  0  0  |  8  0
3673             9  0 10  | 11  0  0  | 12  0
3674     -------------------------------------
3675            13  0 14  | 15 16 17  |  0  0
3676     Proc1   0 18  0  | 19 20 21  |  0  0
3677             0  0  0  | 22 23  0  | 24  0
3678     -------------------------------------
3679     Proc2  25 26 27  |  0  0 28  | 29  0
3680            30  0  0  | 31 32 33  |  0 34
3681 .ve
3682 
3683    This can be represented as a collection of submatrices as:
3684 
3685 .vb
3686       A B C
3687       D E F
3688       G H I
3689 .ve
3690 
3691    Where the submatrices A,B,C are owned by proc0, D,E,F are
3692    owned by proc1, G,H,I are owned by proc2.
3693 
3694    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3695    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3696    The 'M','N' parameters are 8,8, and have the same values on all procs.
3697 
3698    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3699    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3700    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3701    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3702    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3703    matrix, ans [DF] as another SeqAIJ matrix.
3704 
3705    When d_nz, o_nz parameters are specified, d_nz storage elements are
3706    allocated for every row of the local diagonal submatrix, and o_nz
3707    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3708    One way to choose d_nz and o_nz is to use the max nonzerors per local
3709    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3710    In this case, the values of d_nz,o_nz are:
3711 .vb
3712      proc0 : dnz = 2, o_nz = 2
3713      proc1 : dnz = 3, o_nz = 2
3714      proc2 : dnz = 1, o_nz = 4
3715 .ve
3716    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3717    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3718    for proc3. i.e we are using 12+15+10=37 storage locations to store
3719    34 values.
3720 
3721    When d_nnz, o_nnz parameters are specified, the storage is specified
3722    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3723    In the above case the values for d_nnz,o_nnz are:
3724 .vb
3725      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3726      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3727      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3728 .ve
3729    Here the space allocated is sum of all the above values i.e 34, and
3730    hence pre-allocation is perfect.
3731 
3732    Level: intermediate
3733 
3734 .keywords: matrix, aij, compressed row, sparse, parallel
3735 
3736 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3737           MPIAIJ, MatCreateMPIAIJWithArrays()
3738 @*/
3739 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3740 {
3741   PetscErrorCode ierr;
3742   PetscMPIInt    size;
3743 
3744   PetscFunctionBegin;
3745   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3746   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3747   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3748   if (size > 1) {
3749     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3750     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3751   } else {
3752     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3753     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3754   }
3755   PetscFunctionReturn(0);
3756 }
3757 
3758 #undef __FUNCT__
3759 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3760 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3761 {
3762   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3763 
3764   PetscFunctionBegin;
3765   *Ad     = a->A;
3766   *Ao     = a->B;
3767   *colmap = a->garray;
3768   PetscFunctionReturn(0);
3769 }
3770 
3771 #undef __FUNCT__
3772 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3773 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3774 {
3775   PetscErrorCode ierr;
3776   PetscInt       i;
3777   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3778 
3779   PetscFunctionBegin;
3780   if (coloring->ctype == IS_COLORING_GLOBAL) {
3781     ISColoringValue *allcolors,*colors;
3782     ISColoring      ocoloring;
3783 
3784     /* set coloring for diagonal portion */
3785     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3786 
3787     /* set coloring for off-diagonal portion */
3788     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3789     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3790     for (i=0; i<a->B->cmap->n; i++) {
3791       colors[i] = allcolors[a->garray[i]];
3792     }
3793     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3794     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3795     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3796     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3797   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3798     ISColoringValue *colors;
3799     PetscInt        *larray;
3800     ISColoring      ocoloring;
3801 
3802     /* set coloring for diagonal portion */
3803     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3804     for (i=0; i<a->A->cmap->n; i++) {
3805       larray[i] = i + A->cmap->rstart;
3806     }
3807     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3808     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3809     for (i=0; i<a->A->cmap->n; i++) {
3810       colors[i] = coloring->colors[larray[i]];
3811     }
3812     ierr = PetscFree(larray);CHKERRQ(ierr);
3813     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3814     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3815     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3816 
3817     /* set coloring for off-diagonal portion */
3818     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3819     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3820     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3821     for (i=0; i<a->B->cmap->n; i++) {
3822       colors[i] = coloring->colors[larray[i]];
3823     }
3824     ierr = PetscFree(larray);CHKERRQ(ierr);
3825     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3826     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3827     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3828   } else {
3829     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3830   }
3831 
3832   PetscFunctionReturn(0);
3833 }
3834 
3835 #if defined(PETSC_HAVE_ADIC)
3836 #undef __FUNCT__
3837 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3838 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3839 {
3840   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3841   PetscErrorCode ierr;
3842 
3843   PetscFunctionBegin;
3844   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3845   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3846   PetscFunctionReturn(0);
3847 }
3848 #endif
3849 
3850 #undef __FUNCT__
3851 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3852 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3853 {
3854   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3855   PetscErrorCode ierr;
3856 
3857   PetscFunctionBegin;
3858   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3859   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3860   PetscFunctionReturn(0);
3861 }
3862 
3863 #undef __FUNCT__
3864 #define __FUNCT__ "MatMerge"
3865 /*@
3866       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3867                  matrices from each processor
3868 
3869     Collective on MPI_Comm
3870 
3871    Input Parameters:
3872 +    comm - the communicators the parallel matrix will live on
3873 .    inmat - the input sequential matrices
3874 .    n - number of local columns (or PETSC_DECIDE)
3875 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3876 
3877    Output Parameter:
3878 .    outmat - the parallel matrix generated
3879 
3880     Level: advanced
3881 
3882    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3883 
3884 @*/
3885 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3886 {
3887   PetscErrorCode ierr;
3888   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3889   PetscInt       *indx;
3890   PetscScalar    *values;
3891 
3892   PetscFunctionBegin;
3893   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3894   if (scall == MAT_INITIAL_MATRIX){
3895     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3896     if (n == PETSC_DECIDE){
3897       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3898     }
3899     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3900     rstart -= m;
3901 
3902     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3903     for (i=0;i<m;i++) {
3904       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3905       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3906       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3907     }
3908     /* This routine will ONLY return MPIAIJ type matrix */
3909     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3910     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3911     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3912     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3913     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3914 
3915   } else if (scall == MAT_REUSE_MATRIX){
3916     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3917   } else {
3918     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3919   }
3920 
3921   for (i=0;i<m;i++) {
3922     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3923     Ii    = i + rstart;
3924     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3925     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3926   }
3927   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3928   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3929   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3930 
3931   PetscFunctionReturn(0);
3932 }
3933 
3934 #undef __FUNCT__
3935 #define __FUNCT__ "MatFileSplit"
3936 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3937 {
3938   PetscErrorCode    ierr;
3939   PetscMPIInt       rank;
3940   PetscInt          m,N,i,rstart,nnz;
3941   size_t            len;
3942   const PetscInt    *indx;
3943   PetscViewer       out;
3944   char              *name;
3945   Mat               B;
3946   const PetscScalar *values;
3947 
3948   PetscFunctionBegin;
3949   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3950   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3951   /* Should this be the type of the diagonal block of A? */
3952   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3953   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3954   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3955   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3956   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3957   for (i=0;i<m;i++) {
3958     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3959     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3960     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3961   }
3962   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3963   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3964 
3965   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3966   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3967   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3968   sprintf(name,"%s.%d",outfile,rank);
3969   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3970   ierr = PetscFree(name);
3971   ierr = MatView(B,out);CHKERRQ(ierr);
3972   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3973   ierr = MatDestroy(B);CHKERRQ(ierr);
3974   PetscFunctionReturn(0);
3975 }
3976 
3977 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3978 #undef __FUNCT__
3979 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3980 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3981 {
3982   PetscErrorCode       ierr;
3983   Mat_Merge_SeqsToMPI  *merge;
3984   PetscContainer       container;
3985 
3986   PetscFunctionBegin;
3987   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3988   if (container) {
3989     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3990     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3991     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3992     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3993     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3994     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3995     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3996     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3997     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3998     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3999     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4000     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
4001 
4002     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4003     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4004   }
4005   ierr = PetscFree(merge);CHKERRQ(ierr);
4006 
4007   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4008   PetscFunctionReturn(0);
4009 }
4010 
4011 #include "../src/mat/utils/freespace.h"
4012 #include "petscbt.h"
4013 
4014 #undef __FUNCT__
4015 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4016 /*@C
4017       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4018                  matrices from each processor
4019 
4020     Collective on MPI_Comm
4021 
4022    Input Parameters:
4023 +    comm - the communicators the parallel matrix will live on
4024 .    seqmat - the input sequential matrices
4025 .    m - number of local rows (or PETSC_DECIDE)
4026 .    n - number of local columns (or PETSC_DECIDE)
4027 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4028 
4029    Output Parameter:
4030 .    mpimat - the parallel matrix generated
4031 
4032     Level: advanced
4033 
4034    Notes:
4035      The dimensions of the sequential matrix in each processor MUST be the same.
4036      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4037      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4038 @*/
4039 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4040 {
4041   PetscErrorCode       ierr;
4042   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4043   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4044   PetscMPIInt          size,rank,taga,*len_s;
4045   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4046   PetscInt             proc,m;
4047   PetscInt             **buf_ri,**buf_rj;
4048   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4049   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4050   MPI_Request          *s_waits,*r_waits;
4051   MPI_Status           *status;
4052   MatScalar            *aa=a->a;
4053   MatScalar            **abuf_r,*ba_i;
4054   Mat_Merge_SeqsToMPI  *merge;
4055   PetscContainer       container;
4056 
4057   PetscFunctionBegin;
4058   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4059 
4060   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4061   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4062 
4063   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4064   if (container) {
4065     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4066   }
4067   bi     = merge->bi;
4068   bj     = merge->bj;
4069   buf_ri = merge->buf_ri;
4070   buf_rj = merge->buf_rj;
4071 
4072   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4073   owners = merge->rowmap.range;
4074   len_s  = merge->len_s;
4075 
4076   /* send and recv matrix values */
4077   /*-----------------------------*/
4078   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4079   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4080 
4081   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4082   for (proc=0,k=0; proc<size; proc++){
4083     if (!len_s[proc]) continue;
4084     i = owners[proc];
4085     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4086     k++;
4087   }
4088 
4089   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4090   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4091   ierr = PetscFree(status);CHKERRQ(ierr);
4092 
4093   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4094   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4095 
4096   /* insert mat values of mpimat */
4097   /*----------------------------*/
4098   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4099   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4100   nextrow = buf_ri_k + merge->nrecv;
4101   nextai  = nextrow + merge->nrecv;
4102 
4103   for (k=0; k<merge->nrecv; k++){
4104     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4105     nrows = *(buf_ri_k[k]);
4106     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4107     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4108   }
4109 
4110   /* set values of ba */
4111   m = merge->rowmap.n;
4112   for (i=0; i<m; i++) {
4113     arow = owners[rank] + i;
4114     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4115     bnzi = bi[i+1] - bi[i];
4116     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4117 
4118     /* add local non-zero vals of this proc's seqmat into ba */
4119     anzi = ai[arow+1] - ai[arow];
4120     aj   = a->j + ai[arow];
4121     aa   = a->a + ai[arow];
4122     nextaj = 0;
4123     for (j=0; nextaj<anzi; j++){
4124       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4125         ba_i[j] += aa[nextaj++];
4126       }
4127     }
4128 
4129     /* add received vals into ba */
4130     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4131       /* i-th row */
4132       if (i == *nextrow[k]) {
4133         anzi = *(nextai[k]+1) - *nextai[k];
4134         aj   = buf_rj[k] + *(nextai[k]);
4135         aa   = abuf_r[k] + *(nextai[k]);
4136         nextaj = 0;
4137         for (j=0; nextaj<anzi; j++){
4138           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4139             ba_i[j] += aa[nextaj++];
4140           }
4141         }
4142         nextrow[k]++; nextai[k]++;
4143       }
4144     }
4145     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4146   }
4147   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4148   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4149 
4150   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4151   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4152   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4153   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4154   PetscFunctionReturn(0);
4155 }
4156 
4157 #undef __FUNCT__
4158 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4159 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4160 {
4161   PetscErrorCode       ierr;
4162   Mat                  B_mpi;
4163   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4164   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4165   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4166   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4167   PetscInt             len,proc,*dnz,*onz;
4168   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4169   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4170   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4171   MPI_Status           *status;
4172   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4173   PetscBT              lnkbt;
4174   Mat_Merge_SeqsToMPI  *merge;
4175   PetscContainer       container;
4176 
4177   PetscFunctionBegin;
4178   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4179 
4180   /* make sure it is a PETSc comm */
4181   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4182   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4183   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4184 
4185   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4186   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4187 
4188   /* determine row ownership */
4189   /*---------------------------------------------------------*/
4190   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
4191   merge->rowmap.n = m;
4192   merge->rowmap.N = M;
4193   merge->rowmap.bs = 1;
4194   ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr);
4195   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4196   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4197 
4198   m      = merge->rowmap.n;
4199   M      = merge->rowmap.N;
4200   owners = merge->rowmap.range;
4201 
4202   /* determine the number of messages to send, their lengths */
4203   /*---------------------------------------------------------*/
4204   len_s  = merge->len_s;
4205 
4206   len = 0;  /* length of buf_si[] */
4207   merge->nsend = 0;
4208   for (proc=0; proc<size; proc++){
4209     len_si[proc] = 0;
4210     if (proc == rank){
4211       len_s[proc] = 0;
4212     } else {
4213       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4214       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4215     }
4216     if (len_s[proc]) {
4217       merge->nsend++;
4218       nrows = 0;
4219       for (i=owners[proc]; i<owners[proc+1]; i++){
4220         if (ai[i+1] > ai[i]) nrows++;
4221       }
4222       len_si[proc] = 2*(nrows+1);
4223       len += len_si[proc];
4224     }
4225   }
4226 
4227   /* determine the number and length of messages to receive for ij-structure */
4228   /*-------------------------------------------------------------------------*/
4229   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4230   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4231 
4232   /* post the Irecv of j-structure */
4233   /*-------------------------------*/
4234   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4235   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4236 
4237   /* post the Isend of j-structure */
4238   /*--------------------------------*/
4239   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
4240   sj_waits = si_waits + merge->nsend;
4241 
4242   for (proc=0, k=0; proc<size; proc++){
4243     if (!len_s[proc]) continue;
4244     i = owners[proc];
4245     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4246     k++;
4247   }
4248 
4249   /* receives and sends of j-structure are complete */
4250   /*------------------------------------------------*/
4251   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4252   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4253 
4254   /* send and recv i-structure */
4255   /*---------------------------*/
4256   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4257   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4258 
4259   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4260   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4261   for (proc=0,k=0; proc<size; proc++){
4262     if (!len_s[proc]) continue;
4263     /* form outgoing message for i-structure:
4264          buf_si[0]:                 nrows to be sent
4265                [1:nrows]:           row index (global)
4266                [nrows+1:2*nrows+1]: i-structure index
4267     */
4268     /*-------------------------------------------*/
4269     nrows = len_si[proc]/2 - 1;
4270     buf_si_i    = buf_si + nrows+1;
4271     buf_si[0]   = nrows;
4272     buf_si_i[0] = 0;
4273     nrows = 0;
4274     for (i=owners[proc]; i<owners[proc+1]; i++){
4275       anzi = ai[i+1] - ai[i];
4276       if (anzi) {
4277         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4278         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4279         nrows++;
4280       }
4281     }
4282     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4283     k++;
4284     buf_si += len_si[proc];
4285   }
4286 
4287   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4288   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4289 
4290   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4291   for (i=0; i<merge->nrecv; i++){
4292     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4293   }
4294 
4295   ierr = PetscFree(len_si);CHKERRQ(ierr);
4296   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4297   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4298   ierr = PetscFree(si_waits);CHKERRQ(ierr);
4299   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4300   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4301   ierr = PetscFree(status);CHKERRQ(ierr);
4302 
4303   /* compute a local seq matrix in each processor */
4304   /*----------------------------------------------*/
4305   /* allocate bi array and free space for accumulating nonzero column info */
4306   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4307   bi[0] = 0;
4308 
4309   /* create and initialize a linked list */
4310   nlnk = N+1;
4311   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4312 
4313   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4314   len = 0;
4315   len  = ai[owners[rank+1]] - ai[owners[rank]];
4316   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4317   current_space = free_space;
4318 
4319   /* determine symbolic info for each local row */
4320   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4321   nextrow = buf_ri_k + merge->nrecv;
4322   nextai  = nextrow + merge->nrecv;
4323   for (k=0; k<merge->nrecv; k++){
4324     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4325     nrows = *buf_ri_k[k];
4326     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4327     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4328   }
4329 
4330   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4331   len = 0;
4332   for (i=0;i<m;i++) {
4333     bnzi   = 0;
4334     /* add local non-zero cols of this proc's seqmat into lnk */
4335     arow   = owners[rank] + i;
4336     anzi   = ai[arow+1] - ai[arow];
4337     aj     = a->j + ai[arow];
4338     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4339     bnzi += nlnk;
4340     /* add received col data into lnk */
4341     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4342       if (i == *nextrow[k]) { /* i-th row */
4343         anzi = *(nextai[k]+1) - *nextai[k];
4344         aj   = buf_rj[k] + *nextai[k];
4345         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4346         bnzi += nlnk;
4347         nextrow[k]++; nextai[k]++;
4348       }
4349     }
4350     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4351 
4352     /* if free space is not available, make more free space */
4353     if (current_space->local_remaining<bnzi) {
4354       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4355       nspacedouble++;
4356     }
4357     /* copy data into free space, then initialize lnk */
4358     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4359     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4360 
4361     current_space->array           += bnzi;
4362     current_space->local_used      += bnzi;
4363     current_space->local_remaining -= bnzi;
4364 
4365     bi[i+1] = bi[i] + bnzi;
4366   }
4367 
4368   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4369 
4370   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4371   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4372   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4373 
4374   /* create symbolic parallel matrix B_mpi */
4375   /*---------------------------------------*/
4376   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4377   if (n==PETSC_DECIDE) {
4378     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4379   } else {
4380     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4381   }
4382   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4383   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4384   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4385 
4386   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4387   B_mpi->assembled     = PETSC_FALSE;
4388   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4389   merge->bi            = bi;
4390   merge->bj            = bj;
4391   merge->buf_ri        = buf_ri;
4392   merge->buf_rj        = buf_rj;
4393   merge->coi           = PETSC_NULL;
4394   merge->coj           = PETSC_NULL;
4395   merge->owners_co     = PETSC_NULL;
4396 
4397   /* attach the supporting struct to B_mpi for reuse */
4398   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4399   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4400   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4401   *mpimat = B_mpi;
4402 
4403   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4404   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4405   PetscFunctionReturn(0);
4406 }
4407 
4408 #undef __FUNCT__
4409 #define __FUNCT__ "MatMerge_SeqsToMPI"
4410 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4411 {
4412   PetscErrorCode   ierr;
4413 
4414   PetscFunctionBegin;
4415   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4416   if (scall == MAT_INITIAL_MATRIX){
4417     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4418   }
4419   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4420   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4421   PetscFunctionReturn(0);
4422 }
4423 
4424 #undef __FUNCT__
4425 #define __FUNCT__ "MatGetLocalMat"
4426 /*@
4427      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4428 
4429     Not Collective
4430 
4431    Input Parameters:
4432 +    A - the matrix
4433 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4434 
4435    Output Parameter:
4436 .    A_loc - the local sequential matrix generated
4437 
4438     Level: developer
4439 
4440 @*/
4441 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4442 {
4443   PetscErrorCode  ierr;
4444   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4445   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4446   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4447   MatScalar       *aa=a->a,*ba=b->a,*cam;
4448   PetscScalar     *ca;
4449   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4450   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4451 
4452   PetscFunctionBegin;
4453   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4454   if (scall == MAT_INITIAL_MATRIX){
4455     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4456     ci[0] = 0;
4457     for (i=0; i<am; i++){
4458       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4459     }
4460     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4461     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4462     k = 0;
4463     for (i=0; i<am; i++) {
4464       ncols_o = bi[i+1] - bi[i];
4465       ncols_d = ai[i+1] - ai[i];
4466       /* off-diagonal portion of A */
4467       for (jo=0; jo<ncols_o; jo++) {
4468         col = cmap[*bj];
4469         if (col >= cstart) break;
4470         cj[k]   = col; bj++;
4471         ca[k++] = *ba++;
4472       }
4473       /* diagonal portion of A */
4474       for (j=0; j<ncols_d; j++) {
4475         cj[k]   = cstart + *aj++;
4476         ca[k++] = *aa++;
4477       }
4478       /* off-diagonal portion of A */
4479       for (j=jo; j<ncols_o; j++) {
4480         cj[k]   = cmap[*bj++];
4481         ca[k++] = *ba++;
4482       }
4483     }
4484     /* put together the new matrix */
4485     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4486     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4487     /* Since these are PETSc arrays, change flags to free them as necessary. */
4488     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4489     mat->free_a  = PETSC_TRUE;
4490     mat->free_ij = PETSC_TRUE;
4491     mat->nonew   = 0;
4492   } else if (scall == MAT_REUSE_MATRIX){
4493     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4494     ci = mat->i; cj = mat->j; cam = mat->a;
4495     for (i=0; i<am; i++) {
4496       /* off-diagonal portion of A */
4497       ncols_o = bi[i+1] - bi[i];
4498       for (jo=0; jo<ncols_o; jo++) {
4499         col = cmap[*bj];
4500         if (col >= cstart) break;
4501         *cam++ = *ba++; bj++;
4502       }
4503       /* diagonal portion of A */
4504       ncols_d = ai[i+1] - ai[i];
4505       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4506       /* off-diagonal portion of A */
4507       for (j=jo; j<ncols_o; j++) {
4508         *cam++ = *ba++; bj++;
4509       }
4510     }
4511   } else {
4512     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4513   }
4514 
4515   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4516   PetscFunctionReturn(0);
4517 }
4518 
4519 #undef __FUNCT__
4520 #define __FUNCT__ "MatGetLocalMatCondensed"
4521 /*@C
4522      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4523 
4524     Not Collective
4525 
4526    Input Parameters:
4527 +    A - the matrix
4528 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4529 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4530 
4531    Output Parameter:
4532 .    A_loc - the local sequential matrix generated
4533 
4534     Level: developer
4535 
4536 @*/
4537 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4538 {
4539   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4540   PetscErrorCode    ierr;
4541   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4542   IS                isrowa,iscola;
4543   Mat               *aloc;
4544 
4545   PetscFunctionBegin;
4546   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4547   if (!row){
4548     start = A->rmap->rstart; end = A->rmap->rend;
4549     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4550   } else {
4551     isrowa = *row;
4552   }
4553   if (!col){
4554     start = A->cmap->rstart;
4555     cmap  = a->garray;
4556     nzA   = a->A->cmap->n;
4557     nzB   = a->B->cmap->n;
4558     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4559     ncols = 0;
4560     for (i=0; i<nzB; i++) {
4561       if (cmap[i] < start) idx[ncols++] = cmap[i];
4562       else break;
4563     }
4564     imark = i;
4565     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4566     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4567     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4568     ierr = PetscFree(idx);CHKERRQ(ierr);
4569   } else {
4570     iscola = *col;
4571   }
4572   if (scall != MAT_INITIAL_MATRIX){
4573     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4574     aloc[0] = *A_loc;
4575   }
4576   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4577   *A_loc = aloc[0];
4578   ierr = PetscFree(aloc);CHKERRQ(ierr);
4579   if (!row){
4580     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4581   }
4582   if (!col){
4583     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4584   }
4585   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4586   PetscFunctionReturn(0);
4587 }
4588 
4589 #undef __FUNCT__
4590 #define __FUNCT__ "MatGetBrowsOfAcols"
4591 /*@C
4592     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4593 
4594     Collective on Mat
4595 
4596    Input Parameters:
4597 +    A,B - the matrices in mpiaij format
4598 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4599 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4600 
4601    Output Parameter:
4602 +    rowb, colb - index sets of rows and columns of B to extract
4603 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4604 -    B_seq - the sequential matrix generated
4605 
4606     Level: developer
4607 
4608 @*/
4609 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4610 {
4611   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4612   PetscErrorCode    ierr;
4613   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4614   IS                isrowb,iscolb;
4615   Mat               *bseq;
4616 
4617   PetscFunctionBegin;
4618   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4619     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4620   }
4621   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4622 
4623   if (scall == MAT_INITIAL_MATRIX){
4624     start = A->cmap->rstart;
4625     cmap  = a->garray;
4626     nzA   = a->A->cmap->n;
4627     nzB   = a->B->cmap->n;
4628     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4629     ncols = 0;
4630     for (i=0; i<nzB; i++) {  /* row < local row index */
4631       if (cmap[i] < start) idx[ncols++] = cmap[i];
4632       else break;
4633     }
4634     imark = i;
4635     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4636     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4637     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4638     ierr = PetscFree(idx);CHKERRQ(ierr);
4639     *brstart = imark;
4640     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4641   } else {
4642     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4643     isrowb = *rowb; iscolb = *colb;
4644     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4645     bseq[0] = *B_seq;
4646   }
4647   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4648   *B_seq = bseq[0];
4649   ierr = PetscFree(bseq);CHKERRQ(ierr);
4650   if (!rowb){
4651     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4652   } else {
4653     *rowb = isrowb;
4654   }
4655   if (!colb){
4656     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4657   } else {
4658     *colb = iscolb;
4659   }
4660   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4661   PetscFunctionReturn(0);
4662 }
4663 
4664 #undef __FUNCT__
4665 #define __FUNCT__ "MatGetBrowsOfAoCols"
4666 /*@C
4667     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4668     of the OFF-DIAGONAL portion of local A
4669 
4670     Collective on Mat
4671 
4672    Input Parameters:
4673 +    A,B - the matrices in mpiaij format
4674 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4675 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4676 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4677 
4678    Output Parameter:
4679 +    B_oth - the sequential matrix generated
4680 
4681     Level: developer
4682 
4683 @*/
4684 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth)
4685 {
4686   VecScatter_MPI_General *gen_to,*gen_from;
4687   PetscErrorCode         ierr;
4688   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4689   Mat_SeqAIJ             *b_oth;
4690   VecScatter             ctx=a->Mvctx;
4691   MPI_Comm               comm=((PetscObject)ctx)->comm;
4692   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4693   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4694   PetscScalar            *rvalues,*svalues;
4695   MatScalar              *b_otha,*bufa,*bufA;
4696   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4697   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4698   MPI_Status             *sstatus,rstatus;
4699   PetscMPIInt            jj;
4700   PetscInt               *cols,sbs,rbs;
4701   PetscScalar            *vals;
4702 
4703   PetscFunctionBegin;
4704   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4705     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4706   }
4707   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4708   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4709 
4710   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4711   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4712   rvalues  = gen_from->values; /* holds the length of receiving row */
4713   svalues  = gen_to->values;   /* holds the length of sending row */
4714   nrecvs   = gen_from->n;
4715   nsends   = gen_to->n;
4716 
4717   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4718   srow     = gen_to->indices;   /* local row index to be sent */
4719   sstarts  = gen_to->starts;
4720   sprocs   = gen_to->procs;
4721   sstatus  = gen_to->sstatus;
4722   sbs      = gen_to->bs;
4723   rstarts  = gen_from->starts;
4724   rprocs   = gen_from->procs;
4725   rbs      = gen_from->bs;
4726 
4727   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4728   if (scall == MAT_INITIAL_MATRIX){
4729     /* i-array */
4730     /*---------*/
4731     /*  post receives */
4732     for (i=0; i<nrecvs; i++){
4733       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4734       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4735       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4736     }
4737 
4738     /* pack the outgoing message */
4739     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4740     rstartsj = sstartsj + nsends +1;
4741     sstartsj[0] = 0;  rstartsj[0] = 0;
4742     len = 0; /* total length of j or a array to be sent */
4743     k = 0;
4744     for (i=0; i<nsends; i++){
4745       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4746       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4747       for (j=0; j<nrows; j++) {
4748         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4749         for (l=0; l<sbs; l++){
4750           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4751           rowlen[j*sbs+l] = ncols;
4752           len += ncols;
4753           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4754         }
4755         k++;
4756       }
4757       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4758       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4759     }
4760     /* recvs and sends of i-array are completed */
4761     i = nrecvs;
4762     while (i--) {
4763       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4764     }
4765     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4766 
4767     /* allocate buffers for sending j and a arrays */
4768     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4769     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4770 
4771     /* create i-array of B_oth */
4772     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4773     b_othi[0] = 0;
4774     len = 0; /* total length of j or a array to be received */
4775     k = 0;
4776     for (i=0; i<nrecvs; i++){
4777       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4778       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4779       for (j=0; j<nrows; j++) {
4780         b_othi[k+1] = b_othi[k] + rowlen[j];
4781         len += rowlen[j]; k++;
4782       }
4783       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4784     }
4785 
4786     /* allocate space for j and a arrrays of B_oth */
4787     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4788     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4789 
4790     /* j-array */
4791     /*---------*/
4792     /*  post receives of j-array */
4793     for (i=0; i<nrecvs; i++){
4794       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4795       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4796     }
4797 
4798     /* pack the outgoing message j-array */
4799     k = 0;
4800     for (i=0; i<nsends; i++){
4801       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4802       bufJ = bufj+sstartsj[i];
4803       for (j=0; j<nrows; j++) {
4804         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4805         for (ll=0; ll<sbs; ll++){
4806           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4807           for (l=0; l<ncols; l++){
4808             *bufJ++ = cols[l];
4809           }
4810           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4811         }
4812       }
4813       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4814     }
4815 
4816     /* recvs and sends of j-array are completed */
4817     i = nrecvs;
4818     while (i--) {
4819       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4820     }
4821     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4822   } else if (scall == MAT_REUSE_MATRIX){
4823     sstartsj = *startsj;
4824     rstartsj = sstartsj + nsends +1;
4825     bufa     = *bufa_ptr;
4826     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4827     b_otha   = b_oth->a;
4828   } else {
4829     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4830   }
4831 
4832   /* a-array */
4833   /*---------*/
4834   /*  post receives of a-array */
4835   for (i=0; i<nrecvs; i++){
4836     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4837     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4838   }
4839 
4840   /* pack the outgoing message a-array */
4841   k = 0;
4842   for (i=0; i<nsends; i++){
4843     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4844     bufA = bufa+sstartsj[i];
4845     for (j=0; j<nrows; j++) {
4846       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4847       for (ll=0; ll<sbs; ll++){
4848         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4849         for (l=0; l<ncols; l++){
4850           *bufA++ = vals[l];
4851         }
4852         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4853       }
4854     }
4855     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4856   }
4857   /* recvs and sends of a-array are completed */
4858   i = nrecvs;
4859   while (i--) {
4860     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4861   }
4862   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4863   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4864 
4865   if (scall == MAT_INITIAL_MATRIX){
4866     /* put together the new matrix */
4867     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4868 
4869     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4870     /* Since these are PETSc arrays, change flags to free them as necessary. */
4871     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4872     b_oth->free_a  = PETSC_TRUE;
4873     b_oth->free_ij = PETSC_TRUE;
4874     b_oth->nonew   = 0;
4875 
4876     ierr = PetscFree(bufj);CHKERRQ(ierr);
4877     if (!startsj || !bufa_ptr){
4878       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4879       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4880     } else {
4881       *startsj  = sstartsj;
4882       *bufa_ptr = bufa;
4883     }
4884   }
4885   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4886   PetscFunctionReturn(0);
4887 }
4888 
4889 #undef __FUNCT__
4890 #define __FUNCT__ "MatGetCommunicationStructs"
4891 /*@C
4892   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4893 
4894   Not Collective
4895 
4896   Input Parameters:
4897 . A - The matrix in mpiaij format
4898 
4899   Output Parameter:
4900 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4901 . colmap - A map from global column index to local index into lvec
4902 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4903 
4904   Level: developer
4905 
4906 @*/
4907 #if defined (PETSC_USE_CTABLE)
4908 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4909 #else
4910 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4911 #endif
4912 {
4913   Mat_MPIAIJ *a;
4914 
4915   PetscFunctionBegin;
4916   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4917   PetscValidPointer(lvec, 2)
4918   PetscValidPointer(colmap, 3)
4919   PetscValidPointer(multScatter, 4)
4920   a = (Mat_MPIAIJ *) A->data;
4921   if (lvec) *lvec = a->lvec;
4922   if (colmap) *colmap = a->colmap;
4923   if (multScatter) *multScatter = a->Mvctx;
4924   PetscFunctionReturn(0);
4925 }
4926 
4927 EXTERN_C_BEGIN
4928 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*);
4929 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*);
4930 EXTERN_C_END
4931 
4932 #include "../src/mat/impls/dense/mpi/mpidense.h"
4933 
4934 #undef __FUNCT__
4935 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4936 /*
4937     Computes (B'*A')' since computing B*A directly is untenable
4938 
4939                n                       p                          p
4940         (              )       (              )         (                  )
4941       m (      A       )  *  n (       B      )   =   m (         C        )
4942         (              )       (              )         (                  )
4943 
4944 */
4945 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4946 {
4947   PetscErrorCode     ierr;
4948   Mat                At,Bt,Ct;
4949 
4950   PetscFunctionBegin;
4951   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4952   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4953   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4954   ierr = MatDestroy(At);CHKERRQ(ierr);
4955   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4956   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4957   ierr = MatDestroy(Ct);CHKERRQ(ierr);
4958   PetscFunctionReturn(0);
4959 }
4960 
4961 #undef __FUNCT__
4962 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4963 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4964 {
4965   PetscErrorCode ierr;
4966   PetscInt       m=A->rmap->n,n=B->cmap->n;
4967   Mat            Cmat;
4968 
4969   PetscFunctionBegin;
4970   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4971   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
4972   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4973   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4974   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
4975   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4976   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4977   *C   = Cmat;
4978   PetscFunctionReturn(0);
4979 }
4980 
4981 /* ----------------------------------------------------------------*/
4982 #undef __FUNCT__
4983 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4984 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4985 {
4986   PetscErrorCode ierr;
4987 
4988   PetscFunctionBegin;
4989   if (scall == MAT_INITIAL_MATRIX){
4990     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4991   }
4992   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4993   PetscFunctionReturn(0);
4994 }
4995 
4996 EXTERN_C_BEGIN
4997 #if defined(PETSC_HAVE_MUMPS)
4998 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*);
4999 #endif
5000 #if defined(PETSC_HAVE_PASTIX)
5001 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5002 #endif
5003 #if defined(PETSC_HAVE_SUPERLU_DIST)
5004 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5005 #endif
5006 #if defined(PETSC_HAVE_SPOOLES)
5007 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5008 #endif
5009 EXTERN_C_END
5010 
5011 /*MC
5012    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5013 
5014    Options Database Keys:
5015 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5016 
5017   Level: beginner
5018 
5019 .seealso: MatCreateMPIAIJ()
5020 M*/
5021 
5022 EXTERN_C_BEGIN
5023 #undef __FUNCT__
5024 #define __FUNCT__ "MatCreate_MPIAIJ"
5025 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
5026 {
5027   Mat_MPIAIJ     *b;
5028   PetscErrorCode ierr;
5029   PetscMPIInt    size;
5030 
5031   PetscFunctionBegin;
5032   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5033 
5034   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5035   B->data         = (void*)b;
5036   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5037   B->rmap->bs      = 1;
5038   B->assembled    = PETSC_FALSE;
5039   B->mapping      = 0;
5040 
5041   B->insertmode      = NOT_SET_VALUES;
5042   b->size            = size;
5043   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5044 
5045   /* build cache for off array entries formed */
5046   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5047   b->donotstash  = PETSC_FALSE;
5048   b->colmap      = 0;
5049   b->garray      = 0;
5050   b->roworiented = PETSC_TRUE;
5051 
5052   /* stuff used for matrix vector multiply */
5053   b->lvec      = PETSC_NULL;
5054   b->Mvctx     = PETSC_NULL;
5055 
5056   /* stuff for MatGetRow() */
5057   b->rowindices   = 0;
5058   b->rowvalues    = 0;
5059   b->getrowactive = PETSC_FALSE;
5060 
5061 #if defined(PETSC_HAVE_SPOOLES)
5062   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_spooles_C",
5063                                      "MatGetFactor_mpiaij_spooles",
5064                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5065 #endif
5066 #if defined(PETSC_HAVE_MUMPS)
5067   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_mumps_C",
5068                                      "MatGetFactor_mpiaij_mumps",
5069                                      MatGetFactor_mpiaij_mumps);CHKERRQ(ierr);
5070 #endif
5071 #if defined(PETSC_HAVE_PASTIX)
5072   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_pastix_C",
5073 					   "MatGetFactor_mpiaij_pastix",
5074 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5075 #endif
5076 #if defined(PETSC_HAVE_SUPERLU_DIST)
5077   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_superlu_dist_C",
5078                                      "MatGetFactor_mpiaij_superlu_dist",
5079                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5080 #endif
5081   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5082                                      "MatStoreValues_MPIAIJ",
5083                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5084   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5085                                      "MatRetrieveValues_MPIAIJ",
5086                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5087   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5088 				     "MatGetDiagonalBlock_MPIAIJ",
5089                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5090   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5091 				     "MatIsTranspose_MPIAIJ",
5092 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5093   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5094 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5095 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5096   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5097 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5098 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5099   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5100 				     "MatDiagonalScaleLocal_MPIAIJ",
5101 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5102   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
5103                                      "MatConvert_MPIAIJ_MPICSRPERM",
5104                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
5105   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
5106                                      "MatConvert_MPIAIJ_MPICRL",
5107                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
5108   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5109                                      "MatMatMult_MPIDense_MPIAIJ",
5110                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5111   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5112                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5113                                       MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5114   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5115                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5116                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5117   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5118   PetscFunctionReturn(0);
5119 }
5120 EXTERN_C_END
5121 
5122 #undef __FUNCT__
5123 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5124 /*@
5125      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5126          and "off-diagonal" part of the matrix in CSR format.
5127 
5128    Collective on MPI_Comm
5129 
5130    Input Parameters:
5131 +  comm - MPI communicator
5132 .  m - number of local rows (Cannot be PETSC_DECIDE)
5133 .  n - This value should be the same as the local size used in creating the
5134        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5135        calculated if N is given) For square matrices n is almost always m.
5136 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5137 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5138 .   i - row indices for "diagonal" portion of matrix
5139 .   j - column indices
5140 .   a - matrix values
5141 .   oi - row indices for "off-diagonal" portion of matrix
5142 .   oj - column indices
5143 -   oa - matrix values
5144 
5145    Output Parameter:
5146 .   mat - the matrix
5147 
5148    Level: advanced
5149 
5150    Notes:
5151        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5152 
5153        The i and j indices are 0 based
5154 
5155        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5156 
5157 
5158 .keywords: matrix, aij, compressed row, sparse, parallel
5159 
5160 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5161           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5162 @*/
5163 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5164 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5165 {
5166   PetscErrorCode ierr;
5167   Mat_MPIAIJ     *maij;
5168 
5169  PetscFunctionBegin;
5170   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5171   if (i[0]) {
5172     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5173   }
5174   if (oi[0]) {
5175     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5176   }
5177   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5178   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5179   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5180   maij = (Mat_MPIAIJ*) (*mat)->data;
5181   maij->donotstash     = PETSC_TRUE;
5182   (*mat)->preallocated = PETSC_TRUE;
5183 
5184   ierr = PetscMapSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5185   ierr = PetscMapSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5186   ierr = PetscMapSetUp((*mat)->rmap);CHKERRQ(ierr);
5187   ierr = PetscMapSetUp((*mat)->cmap);CHKERRQ(ierr);
5188 
5189   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5190   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5191 
5192   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5193   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5194   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5195   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5196 
5197   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5198   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5199   PetscFunctionReturn(0);
5200 }
5201 
5202 /*
5203     Special version for direct calls from Fortran
5204 */
5205 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5206 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5207 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5208 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5209 #endif
5210 
5211 /* Change these macros so can be used in void function */
5212 #undef CHKERRQ
5213 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5214 #undef SETERRQ2
5215 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5216 #undef SETERRQ
5217 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5218 
5219 EXTERN_C_BEGIN
5220 #undef __FUNCT__
5221 #define __FUNCT__ "matsetvaluesmpiaij_"
5222 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5223 {
5224   Mat             mat = *mmat;
5225   PetscInt        m = *mm, n = *mn;
5226   InsertMode      addv = *maddv;
5227   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5228   PetscScalar     value;
5229   PetscErrorCode  ierr;
5230 
5231   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5232   if (mat->insertmode == NOT_SET_VALUES) {
5233     mat->insertmode = addv;
5234   }
5235 #if defined(PETSC_USE_DEBUG)
5236   else if (mat->insertmode != addv) {
5237     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5238   }
5239 #endif
5240   {
5241   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5242   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5243   PetscTruth      roworiented = aij->roworiented;
5244 
5245   /* Some Variables required in the macro */
5246   Mat             A = aij->A;
5247   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5248   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5249   MatScalar       *aa = a->a;
5250   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5251   Mat             B = aij->B;
5252   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5253   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5254   MatScalar       *ba = b->a;
5255 
5256   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5257   PetscInt        nonew = a->nonew;
5258   MatScalar       *ap1,*ap2;
5259 
5260   PetscFunctionBegin;
5261   for (i=0; i<m; i++) {
5262     if (im[i] < 0) continue;
5263 #if defined(PETSC_USE_DEBUG)
5264     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5265 #endif
5266     if (im[i] >= rstart && im[i] < rend) {
5267       row      = im[i] - rstart;
5268       lastcol1 = -1;
5269       rp1      = aj + ai[row];
5270       ap1      = aa + ai[row];
5271       rmax1    = aimax[row];
5272       nrow1    = ailen[row];
5273       low1     = 0;
5274       high1    = nrow1;
5275       lastcol2 = -1;
5276       rp2      = bj + bi[row];
5277       ap2      = ba + bi[row];
5278       rmax2    = bimax[row];
5279       nrow2    = bilen[row];
5280       low2     = 0;
5281       high2    = nrow2;
5282 
5283       for (j=0; j<n; j++) {
5284         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5285         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5286         if (in[j] >= cstart && in[j] < cend){
5287           col = in[j] - cstart;
5288           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5289         } else if (in[j] < 0) continue;
5290 #if defined(PETSC_USE_DEBUG)
5291         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5292 #endif
5293         else {
5294           if (mat->was_assembled) {
5295             if (!aij->colmap) {
5296               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5297             }
5298 #if defined (PETSC_USE_CTABLE)
5299             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5300 	    col--;
5301 #else
5302             col = aij->colmap[in[j]] - 1;
5303 #endif
5304             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5305               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5306               col =  in[j];
5307               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5308               B = aij->B;
5309               b = (Mat_SeqAIJ*)B->data;
5310               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5311               rp2      = bj + bi[row];
5312               ap2      = ba + bi[row];
5313               rmax2    = bimax[row];
5314               nrow2    = bilen[row];
5315               low2     = 0;
5316               high2    = nrow2;
5317               bm       = aij->B->rmap->n;
5318               ba = b->a;
5319             }
5320           } else col = in[j];
5321           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5322         }
5323       }
5324     } else {
5325       if (!aij->donotstash) {
5326         if (roworiented) {
5327           if (ignorezeroentries && v[i*n] == 0.0) continue;
5328           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
5329         } else {
5330           if (ignorezeroentries && v[i] == 0.0) continue;
5331           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
5332         }
5333       }
5334     }
5335   }}
5336   PetscFunctionReturnVoid();
5337 }
5338 EXTERN_C_END
5339 
5340