xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision fddf0933058fbad754b3e36ebcce44b378b6bc5d)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "../src/inline/spops.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscTruth     aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap->n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap->n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap->n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap->n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 
207 #define CHUNKSIZE   15
208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
209 { \
210     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
211     lastcol1 = col;\
212     while (high1-low1 > 5) { \
213       t = (low1+high1)/2; \
214       if (rp1[t] > col) high1 = t; \
215       else             low1  = t; \
216     } \
217       for (_i=low1; _i<high1; _i++) { \
218         if (rp1[_i] > col) break; \
219         if (rp1[_i] == col) { \
220           if (addv == ADD_VALUES) ap1[_i] += value;   \
221           else                    ap1[_i] = value; \
222           goto a_noinsert; \
223         } \
224       }  \
225       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
226       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
227       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
228       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
229       N = nrow1++ - 1; a->nz++; high1++; \
230       /* shift up all the later entries in this row */ \
231       for (ii=N; ii>=_i; ii--) { \
232         rp1[ii+1] = rp1[ii]; \
233         ap1[ii+1] = ap1[ii]; \
234       } \
235       rp1[_i] = col;  \
236       ap1[_i] = value;  \
237       a_noinsert: ; \
238       ailen[row] = nrow1; \
239 }
240 
241 
242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
243 { \
244     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
245     lastcol2 = col;\
246     while (high2-low2 > 5) { \
247       t = (low2+high2)/2; \
248       if (rp2[t] > col) high2 = t; \
249       else             low2  = t; \
250     } \
251     for (_i=low2; _i<high2; _i++) {		\
252       if (rp2[_i] > col) break;			\
253       if (rp2[_i] == col) {			      \
254 	if (addv == ADD_VALUES) ap2[_i] += value;     \
255 	else                    ap2[_i] = value;      \
256 	goto b_noinsert;			      \
257       }						      \
258     }							      \
259     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
260     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
261     if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
262     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
263     N = nrow2++ - 1; b->nz++; high2++;					\
264     /* shift up all the later entries in this row */			\
265     for (ii=N; ii>=_i; ii--) {						\
266       rp2[ii+1] = rp2[ii];						\
267       ap2[ii+1] = ap2[ii];						\
268     }									\
269     rp2[_i] = col;							\
270     ap2[_i] = value;							\
271     b_noinsert: ;								\
272     bilen[row] = nrow2;							\
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
278 {
279   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
280   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
281   PetscErrorCode ierr;
282   PetscInt       l,*garray = mat->garray,diag;
283 
284   PetscFunctionBegin;
285   /* code only works for square matrices A */
286 
287   /* find size of row to the left of the diagonal part */
288   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
289   row  = row - diag;
290   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
291     if (garray[b->j[b->i[row]+l]] > diag) break;
292   }
293   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
294 
295   /* diagonal part */
296   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
297 
298   /* right of diagonal part */
299   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
300   PetscFunctionReturn(0);
301 }
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "MatSetValues_MPIAIJ"
305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
306 {
307   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
308   PetscScalar    value;
309   PetscErrorCode ierr;
310   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
311   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
312   PetscTruth     roworiented = aij->roworiented;
313 
314   /* Some Variables required in the macro */
315   Mat            A = aij->A;
316   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
317   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
318   MatScalar      *aa = a->a;
319   PetscTruth     ignorezeroentries = a->ignorezeroentries;
320   Mat            B = aij->B;
321   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
322   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
323   MatScalar      *ba = b->a;
324 
325   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
326   PetscInt       nonew = a->nonew;
327   MatScalar      *ap1,*ap2;
328 
329   PetscFunctionBegin;
330   for (i=0; i<m; i++) {
331     if (im[i] < 0) continue;
332 #if defined(PETSC_USE_DEBUG)
333     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
334 #endif
335     if (im[i] >= rstart && im[i] < rend) {
336       row      = im[i] - rstart;
337       lastcol1 = -1;
338       rp1      = aj + ai[row];
339       ap1      = aa + ai[row];
340       rmax1    = aimax[row];
341       nrow1    = ailen[row];
342       low1     = 0;
343       high1    = nrow1;
344       lastcol2 = -1;
345       rp2      = bj + bi[row];
346       ap2      = ba + bi[row];
347       rmax2    = bimax[row];
348       nrow2    = bilen[row];
349       low2     = 0;
350       high2    = nrow2;
351 
352       for (j=0; j<n; j++) {
353         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
354         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
355         if (in[j] >= cstart && in[j] < cend){
356           col = in[j] - cstart;
357           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
358         } else if (in[j] < 0) continue;
359 #if defined(PETSC_USE_DEBUG)
360         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
361 #endif
362         else {
363           if (mat->was_assembled) {
364             if (!aij->colmap) {
365               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
366             }
367 #if defined (PETSC_USE_CTABLE)
368             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
369 	    col--;
370 #else
371             col = aij->colmap[in[j]] - 1;
372 #endif
373             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
374               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
375               col =  in[j];
376               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
377               B = aij->B;
378               b = (Mat_SeqAIJ*)B->data;
379               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
380               rp2      = bj + bi[row];
381               ap2      = ba + bi[row];
382               rmax2    = bimax[row];
383               nrow2    = bilen[row];
384               low2     = 0;
385               high2    = nrow2;
386               bm       = aij->B->rmap->n;
387               ba = b->a;
388             }
389           } else col = in[j];
390           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
391         }
392       }
393     } else {
394       if (!aij->donotstash) {
395         if (roworiented) {
396           if (ignorezeroentries && v[i*n] == 0.0) continue;
397           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
398         } else {
399           if (ignorezeroentries && v[i] == 0.0) continue;
400           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
401         }
402       }
403     }
404   }
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNCT__
409 #define __FUNCT__ "MatGetValues_MPIAIJ"
410 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
411 {
412   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
413   PetscErrorCode ierr;
414   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
415   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
416 
417   PetscFunctionBegin;
418   for (i=0; i<m; i++) {
419     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
420     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
421     if (idxm[i] >= rstart && idxm[i] < rend) {
422       row = idxm[i] - rstart;
423       for (j=0; j<n; j++) {
424         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
425         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
426         if (idxn[j] >= cstart && idxn[j] < cend){
427           col = idxn[j] - cstart;
428           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
429         } else {
430           if (!aij->colmap) {
431             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
432           }
433 #if defined (PETSC_USE_CTABLE)
434           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
435           col --;
436 #else
437           col = aij->colmap[idxn[j]] - 1;
438 #endif
439           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
440           else {
441             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
442           }
443         }
444       }
445     } else {
446       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
447     }
448   }
449   PetscFunctionReturn(0);
450 }
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscErrorCode ierr;
458   PetscInt       nstash,reallocs;
459   InsertMode     addv;
460 
461   PetscFunctionBegin;
462   if (aij->donotstash) {
463     PetscFunctionReturn(0);
464   }
465 
466   /* make sure all processors are either in INSERTMODE or ADDMODE */
467   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
468   if (addv == (ADD_VALUES|INSERT_VALUES)) {
469     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
470   }
471   mat->insertmode = addv; /* in case this processor had no cache */
472 
473   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
474   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
475   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
482 {
483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
484   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
485   PetscErrorCode ierr;
486   PetscMPIInt    n;
487   PetscInt       i,j,rstart,ncols,flg;
488   PetscInt       *row,*col;
489   PetscTruth     other_disassembled;
490   PetscScalar    *val;
491   InsertMode     addv = mat->insertmode;
492 
493   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
494   PetscFunctionBegin;
495   if (!aij->donotstash) {
496     while (1) {
497       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
498       if (!flg) break;
499 
500       for (i=0; i<n;) {
501         /* Now identify the consecutive vals belonging to the same row */
502         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
503         if (j < n) ncols = j-i;
504         else       ncols = n-i;
505         /* Now assemble all these values with a single function call */
506         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
507         i = j;
508       }
509     }
510     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
511   }
512   a->compressedrow.use     = PETSC_FALSE;
513   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
514   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
515 
516   /* determine if any processor has disassembled, if so we must
517      also disassemble ourselfs, in order that we may reassemble. */
518   /*
519      if nonzero structure of submatrix B cannot change then we know that
520      no processor disassembled thus we can skip this stuff
521   */
522   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
523     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
524     if (mat->was_assembled && !other_disassembled) {
525       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
526     }
527   }
528   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
529     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
530   }
531   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
532   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
533   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
534   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
535 
536   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
537   aij->rowvalues = 0;
538 
539   /* used by MatAXPY() */
540   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
541   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
542 
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
549 {
550   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
551   PetscErrorCode ierr;
552 
553   PetscFunctionBegin;
554   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
555   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNCT__
560 #define __FUNCT__ "MatZeroRows_MPIAIJ"
561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
562 {
563   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
564   PetscErrorCode ierr;
565   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
566   PetscInt       i,*owners = A->rmap->range;
567   PetscInt       *nprocs,j,idx,nsends,row;
568   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
569   PetscInt       *rvalues,count,base,slen,*source;
570   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
571   MPI_Comm       comm = ((PetscObject)A)->comm;
572   MPI_Request    *send_waits,*recv_waits;
573   MPI_Status     recv_status,*send_status;
574 #if defined(PETSC_DEBUG)
575   PetscTruth     found = PETSC_FALSE;
576 #endif
577 
578   PetscFunctionBegin;
579   /*  first count number of contributors to each processor */
580   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
581   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
582   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
583   j = 0;
584   for (i=0; i<N; i++) {
585     if (lastidx > (idx = rows[i])) j = 0;
586     lastidx = idx;
587     for (; j<size; j++) {
588       if (idx >= owners[j] && idx < owners[j+1]) {
589         nprocs[2*j]++;
590         nprocs[2*j+1] = 1;
591         owner[i] = j;
592 #if defined(PETSC_DEBUG)
593         found = PETSC_TRUE;
594 #endif
595         break;
596       }
597     }
598 #if defined(PETSC_DEBUG)
599     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
600     found = PETSC_FALSE;
601 #endif
602   }
603   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
604 
605   /* inform other processors of number of messages and max length*/
606   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
607 
608   /* post receives:   */
609   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
610   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
611   for (i=0; i<nrecvs; i++) {
612     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
613   }
614 
615   /* do sends:
616       1) starts[i] gives the starting index in svalues for stuff going to
617          the ith processor
618   */
619   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
620   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
621   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
622   starts[0] = 0;
623   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
624   for (i=0; i<N; i++) {
625     svalues[starts[owner[i]]++] = rows[i];
626   }
627 
628   starts[0] = 0;
629   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
630   count = 0;
631   for (i=0; i<size; i++) {
632     if (nprocs[2*i+1]) {
633       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
634     }
635   }
636   ierr = PetscFree(starts);CHKERRQ(ierr);
637 
638   base = owners[rank];
639 
640   /*  wait on receives */
641   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
642   source = lens + nrecvs;
643   count  = nrecvs; slen = 0;
644   while (count) {
645     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
646     /* unpack receives into our local space */
647     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
648     source[imdex]  = recv_status.MPI_SOURCE;
649     lens[imdex]    = n;
650     slen          += n;
651     count--;
652   }
653   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
654 
655   /* move the data into the send scatter */
656   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
657   count = 0;
658   for (i=0; i<nrecvs; i++) {
659     values = rvalues + i*nmax;
660     for (j=0; j<lens[i]; j++) {
661       lrows[count++] = values[j] - base;
662     }
663   }
664   ierr = PetscFree(rvalues);CHKERRQ(ierr);
665   ierr = PetscFree(lens);CHKERRQ(ierr);
666   ierr = PetscFree(owner);CHKERRQ(ierr);
667   ierr = PetscFree(nprocs);CHKERRQ(ierr);
668 
669   /* actually zap the local rows */
670   /*
671         Zero the required rows. If the "diagonal block" of the matrix
672      is square and the user wishes to set the diagonal we use separate
673      code so that MatSetValues() is not called for each diagonal allocating
674      new memory, thus calling lots of mallocs and slowing things down.
675 
676        Contributed by: Matthew Knepley
677   */
678   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
679   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
680   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
681     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
682   } else if (diag != 0.0) {
683     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
684     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
685       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
687     }
688     for (i = 0; i < slen; i++) {
689       row  = lrows[i] + rstart;
690       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
691     }
692     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
693     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
694   } else {
695     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
696   }
697   ierr = PetscFree(lrows);CHKERRQ(ierr);
698 
699   /* wait on sends */
700   if (nsends) {
701     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
702     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
703     ierr = PetscFree(send_status);CHKERRQ(ierr);
704   }
705   ierr = PetscFree(send_waits);CHKERRQ(ierr);
706   ierr = PetscFree(svalues);CHKERRQ(ierr);
707 
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNCT__
712 #define __FUNCT__ "MatMult_MPIAIJ"
713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
714 {
715   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
716   PetscErrorCode ierr;
717   PetscInt       nt;
718 
719   PetscFunctionBegin;
720   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
721   if (nt != A->cmap->n) {
722     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
723   }
724   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
725   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
726   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
727   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
728   PetscFunctionReturn(0);
729 }
730 
731 #undef __FUNCT__
732 #define __FUNCT__ "MatMultAdd_MPIAIJ"
733 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
734 {
735   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
736   PetscErrorCode ierr;
737 
738   PetscFunctionBegin;
739   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
740   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
741   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
742   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
743   PetscFunctionReturn(0);
744 }
745 
746 #undef __FUNCT__
747 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
748 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
749 {
750   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
751   PetscErrorCode ierr;
752   PetscTruth     merged;
753 
754   PetscFunctionBegin;
755   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
756   /* do nondiagonal part */
757   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
758   if (!merged) {
759     /* send it on its way */
760     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
761     /* do local part */
762     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
763     /* receive remote parts: note this assumes the values are not actually */
764     /* added in yy until the next line, */
765     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
766   } else {
767     /* do local part */
768     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
769     /* send it on its way */
770     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
771     /* values actually were received in the Begin() but we need to call this nop */
772     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
773   }
774   PetscFunctionReturn(0);
775 }
776 
777 EXTERN_C_BEGIN
778 #undef __FUNCT__
779 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
780 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
781 {
782   MPI_Comm       comm;
783   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
784   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
785   IS             Me,Notme;
786   PetscErrorCode ierr;
787   PetscInt       M,N,first,last,*notme,i;
788   PetscMPIInt    size;
789 
790   PetscFunctionBegin;
791 
792   /* Easy test: symmetric diagonal block */
793   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
794   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
795   if (!*f) PetscFunctionReturn(0);
796   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
797   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
798   if (size == 1) PetscFunctionReturn(0);
799 
800   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
801   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
802   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
803   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
804   for (i=0; i<first; i++) notme[i] = i;
805   for (i=last; i<M; i++) notme[i-last+first] = i;
806   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
807   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
808   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
809   Aoff = Aoffs[0];
810   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
811   Boff = Boffs[0];
812   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
813   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
814   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
815   ierr = ISDestroy(Me);CHKERRQ(ierr);
816   ierr = ISDestroy(Notme);CHKERRQ(ierr);
817 
818   PetscFunctionReturn(0);
819 }
820 EXTERN_C_END
821 
822 #undef __FUNCT__
823 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
824 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
825 {
826   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
827   PetscErrorCode ierr;
828 
829   PetscFunctionBegin;
830   /* do nondiagonal part */
831   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
832   /* send it on its way */
833   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
834   /* do local part */
835   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
836   /* receive remote parts */
837   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
838   PetscFunctionReturn(0);
839 }
840 
841 /*
842   This only works correctly for square matrices where the subblock A->A is the
843    diagonal block
844 */
845 #undef __FUNCT__
846 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
847 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
848 {
849   PetscErrorCode ierr;
850   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
851 
852   PetscFunctionBegin;
853   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
854   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) {
855     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
856   }
857   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
858   PetscFunctionReturn(0);
859 }
860 
861 #undef __FUNCT__
862 #define __FUNCT__ "MatScale_MPIAIJ"
863 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
864 {
865   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
866   PetscErrorCode ierr;
867 
868   PetscFunctionBegin;
869   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
870   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
871   PetscFunctionReturn(0);
872 }
873 
874 #undef __FUNCT__
875 #define __FUNCT__ "MatDestroy_MPIAIJ"
876 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
877 {
878   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
879   PetscErrorCode ierr;
880 
881   PetscFunctionBegin;
882 #if defined(PETSC_USE_LOG)
883   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
884 #endif
885   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
886   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
887   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
888 #if defined (PETSC_USE_CTABLE)
889   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
890 #else
891   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
892 #endif
893   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
894   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
895   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
896   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
897   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
898   ierr = PetscFree(aij);CHKERRQ(ierr);
899 
900   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
901   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
902   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
903   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
904   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
905   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
906   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
907   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
908   PetscFunctionReturn(0);
909 }
910 
911 #undef __FUNCT__
912 #define __FUNCT__ "MatView_MPIAIJ_Binary"
913 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
914 {
915   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
916   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
917   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
918   PetscErrorCode    ierr;
919   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
920   int               fd;
921   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
922   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
923   PetscScalar       *column_values;
924 
925   PetscFunctionBegin;
926   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
927   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
928   nz   = A->nz + B->nz;
929   if (!rank) {
930     header[0] = MAT_FILE_COOKIE;
931     header[1] = mat->rmap->N;
932     header[2] = mat->cmap->N;
933     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
934     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
935     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
936     /* get largest number of rows any processor has */
937     rlen = mat->rmap->n;
938     range = mat->rmap->range;
939     for (i=1; i<size; i++) {
940       rlen = PetscMax(rlen,range[i+1] - range[i]);
941     }
942   } else {
943     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
944     rlen = mat->rmap->n;
945   }
946 
947   /* load up the local row counts */
948   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
949   for (i=0; i<mat->rmap->n; i++) {
950     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
951   }
952 
953   /* store the row lengths to the file */
954   if (!rank) {
955     MPI_Status status;
956     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
957     for (i=1; i<size; i++) {
958       rlen = range[i+1] - range[i];
959       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
960       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
961     }
962   } else {
963     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
964   }
965   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
966 
967   /* load up the local column indices */
968   nzmax = nz; /* )th processor needs space a largest processor needs */
969   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
970   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
971   cnt  = 0;
972   for (i=0; i<mat->rmap->n; i++) {
973     for (j=B->i[i]; j<B->i[i+1]; j++) {
974       if ( (col = garray[B->j[j]]) > cstart) break;
975       column_indices[cnt++] = col;
976     }
977     for (k=A->i[i]; k<A->i[i+1]; k++) {
978       column_indices[cnt++] = A->j[k] + cstart;
979     }
980     for (; j<B->i[i+1]; j++) {
981       column_indices[cnt++] = garray[B->j[j]];
982     }
983   }
984   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
985 
986   /* store the column indices to the file */
987   if (!rank) {
988     MPI_Status status;
989     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
990     for (i=1; i<size; i++) {
991       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
992       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
993       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
994       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
995     }
996   } else {
997     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
998     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
999   }
1000   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1001 
1002   /* load up the local column values */
1003   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1004   cnt  = 0;
1005   for (i=0; i<mat->rmap->n; i++) {
1006     for (j=B->i[i]; j<B->i[i+1]; j++) {
1007       if ( garray[B->j[j]] > cstart) break;
1008       column_values[cnt++] = B->a[j];
1009     }
1010     for (k=A->i[i]; k<A->i[i+1]; k++) {
1011       column_values[cnt++] = A->a[k];
1012     }
1013     for (; j<B->i[i+1]; j++) {
1014       column_values[cnt++] = B->a[j];
1015     }
1016   }
1017   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1018 
1019   /* store the column values to the file */
1020   if (!rank) {
1021     MPI_Status status;
1022     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1023     for (i=1; i<size; i++) {
1024       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1025       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1026       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1027       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1028     }
1029   } else {
1030     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1031     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1032   }
1033   ierr = PetscFree(column_values);CHKERRQ(ierr);
1034   PetscFunctionReturn(0);
1035 }
1036 
1037 #undef __FUNCT__
1038 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1039 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1040 {
1041   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1042   PetscErrorCode    ierr;
1043   PetscMPIInt       rank = aij->rank,size = aij->size;
1044   PetscTruth        isdraw,iascii,isbinary;
1045   PetscViewer       sviewer;
1046   PetscViewerFormat format;
1047 
1048   PetscFunctionBegin;
1049   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1050   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1051   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1052   if (iascii) {
1053     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1054     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1055       MatInfo    info;
1056       PetscTruth inodes;
1057 
1058       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1059       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1060       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1061       if (!inodes) {
1062         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1063 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1064       } else {
1065         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1066 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1067       }
1068       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1069       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1070       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1071       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1072       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1073       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1074       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1075       PetscFunctionReturn(0);
1076     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1077       PetscInt   inodecount,inodelimit,*inodes;
1078       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1079       if (inodes) {
1080         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1081       } else {
1082         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1083       }
1084       PetscFunctionReturn(0);
1085     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1086       PetscFunctionReturn(0);
1087     }
1088   } else if (isbinary) {
1089     if (size == 1) {
1090       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1091       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1092     } else {
1093       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1094     }
1095     PetscFunctionReturn(0);
1096   } else if (isdraw) {
1097     PetscDraw  draw;
1098     PetscTruth isnull;
1099     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1100     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1101   }
1102 
1103   if (size == 1) {
1104     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1105     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1106   } else {
1107     /* assemble the entire matrix onto first processor. */
1108     Mat         A;
1109     Mat_SeqAIJ  *Aloc;
1110     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1111     MatScalar   *a;
1112 
1113     if (mat->rmap->N > 1024) {
1114       PetscTruth flg = PETSC_FALSE;
1115 
1116       ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1117       if (!flg) {
1118         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1119       }
1120     }
1121 
1122     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1123     if (!rank) {
1124       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1125     } else {
1126       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1127     }
1128     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1129     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1130     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1131     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1132 
1133     /* copy over the A part */
1134     Aloc = (Mat_SeqAIJ*)aij->A->data;
1135     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1136     row = mat->rmap->rstart;
1137     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1138     for (i=0; i<m; i++) {
1139       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1140       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1141     }
1142     aj = Aloc->j;
1143     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1144 
1145     /* copy over the B part */
1146     Aloc = (Mat_SeqAIJ*)aij->B->data;
1147     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1148     row  = mat->rmap->rstart;
1149     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1150     ct   = cols;
1151     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1152     for (i=0; i<m; i++) {
1153       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1154       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1155     }
1156     ierr = PetscFree(ct);CHKERRQ(ierr);
1157     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1158     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1159     /*
1160        Everyone has to call to draw the matrix since the graphics waits are
1161        synchronized across all processors that share the PetscDraw object
1162     */
1163     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1164     if (!rank) {
1165       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1166       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1167     }
1168     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1169     ierr = MatDestroy(A);CHKERRQ(ierr);
1170   }
1171   PetscFunctionReturn(0);
1172 }
1173 
1174 #undef __FUNCT__
1175 #define __FUNCT__ "MatView_MPIAIJ"
1176 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1177 {
1178   PetscErrorCode ierr;
1179   PetscTruth     iascii,isdraw,issocket,isbinary;
1180 
1181   PetscFunctionBegin;
1182   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1183   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1184   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1185   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1186   if (iascii || isdraw || isbinary || issocket) {
1187     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1188   } else {
1189     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1190   }
1191   PetscFunctionReturn(0);
1192 }
1193 
1194 #undef __FUNCT__
1195 #define __FUNCT__ "MatRelax_MPIAIJ"
1196 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1197 {
1198   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1199   PetscErrorCode ierr;
1200   Vec            bb1;
1201 
1202   PetscFunctionBegin;
1203   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1204 
1205   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1206     if (flag & SOR_ZERO_INITIAL_GUESS) {
1207       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1208       its--;
1209     }
1210 
1211     while (its--) {
1212       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1213       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1214 
1215       /* update rhs: bb1 = bb - B*x */
1216       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1217       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1218 
1219       /* local sweep */
1220       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
1221     }
1222   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1223     if (flag & SOR_ZERO_INITIAL_GUESS) {
1224       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1225       its--;
1226     }
1227     while (its--) {
1228       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1229       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1230 
1231       /* update rhs: bb1 = bb - B*x */
1232       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1233       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1234 
1235       /* local sweep */
1236       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1237     }
1238   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1239     if (flag & SOR_ZERO_INITIAL_GUESS) {
1240       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1241       its--;
1242     }
1243     while (its--) {
1244       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1245       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1246 
1247       /* update rhs: bb1 = bb - B*x */
1248       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1249       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1250 
1251       /* local sweep */
1252       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1253     }
1254   } else {
1255     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1256   }
1257 
1258   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #undef __FUNCT__
1263 #define __FUNCT__ "MatPermute_MPIAIJ"
1264 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1265 {
1266   MPI_Comm       comm,pcomm;
1267   PetscInt       first,local_size,nrows;
1268   const PetscInt *rows;
1269   PetscMPIInt    size;
1270   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1271   PetscErrorCode ierr;
1272 
1273   PetscFunctionBegin;
1274   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1275   /* make a collective version of 'rowp' */
1276   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1277   if (pcomm==comm) {
1278     crowp = rowp;
1279   } else {
1280     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1281     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1282     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1283     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1284   }
1285   /* collect the global row permutation and invert it */
1286   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1287   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1288   if (pcomm!=comm) {
1289     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1290   }
1291   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1292   /* get the local target indices */
1293   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1294   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1295   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1296   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
1297   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1298   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1299   /* the column permutation is so much easier;
1300      make a local version of 'colp' and invert it */
1301   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1302   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1303   if (size==1) {
1304     lcolp = colp;
1305   } else {
1306     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1307     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1308     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
1309   }
1310   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1311   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1312   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1313   if (size>1) {
1314     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1315     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1316   }
1317   /* now we just get the submatrix */
1318   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1319   /* clean up */
1320   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1321   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 #undef __FUNCT__
1326 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1327 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1328 {
1329   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1330   Mat            A = mat->A,B = mat->B;
1331   PetscErrorCode ierr;
1332   PetscReal      isend[5],irecv[5];
1333 
1334   PetscFunctionBegin;
1335   info->block_size     = 1.0;
1336   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1337   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1338   isend[3] = info->memory;  isend[4] = info->mallocs;
1339   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1340   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1341   isend[3] += info->memory;  isend[4] += info->mallocs;
1342   if (flag == MAT_LOCAL) {
1343     info->nz_used      = isend[0];
1344     info->nz_allocated = isend[1];
1345     info->nz_unneeded  = isend[2];
1346     info->memory       = isend[3];
1347     info->mallocs      = isend[4];
1348   } else if (flag == MAT_GLOBAL_MAX) {
1349     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1350     info->nz_used      = irecv[0];
1351     info->nz_allocated = irecv[1];
1352     info->nz_unneeded  = irecv[2];
1353     info->memory       = irecv[3];
1354     info->mallocs      = irecv[4];
1355   } else if (flag == MAT_GLOBAL_SUM) {
1356     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1357     info->nz_used      = irecv[0];
1358     info->nz_allocated = irecv[1];
1359     info->nz_unneeded  = irecv[2];
1360     info->memory       = irecv[3];
1361     info->mallocs      = irecv[4];
1362   }
1363   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1364   info->fill_ratio_needed = 0;
1365   info->factor_mallocs    = 0;
1366 
1367   PetscFunctionReturn(0);
1368 }
1369 
1370 #undef __FUNCT__
1371 #define __FUNCT__ "MatSetOption_MPIAIJ"
1372 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1373 {
1374   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1375   PetscErrorCode ierr;
1376 
1377   PetscFunctionBegin;
1378   switch (op) {
1379   case MAT_NEW_NONZERO_LOCATIONS:
1380   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1381   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1382   case MAT_KEEP_NONZERO_PATTERN:
1383   case MAT_NEW_NONZERO_LOCATION_ERR:
1384   case MAT_USE_INODES:
1385   case MAT_IGNORE_ZERO_ENTRIES:
1386     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1387     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1388     break;
1389   case MAT_ROW_ORIENTED:
1390     a->roworiented = flg;
1391     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1392     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1393     break;
1394   case MAT_NEW_DIAGONALS:
1395     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1396     break;
1397   case MAT_IGNORE_OFF_PROC_ENTRIES:
1398     a->donotstash = PETSC_TRUE;
1399     break;
1400   case MAT_SYMMETRIC:
1401     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1402     break;
1403   case MAT_STRUCTURALLY_SYMMETRIC:
1404   case MAT_HERMITIAN:
1405   case MAT_SYMMETRY_ETERNAL:
1406     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1407     break;
1408   default:
1409     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1410   }
1411   PetscFunctionReturn(0);
1412 }
1413 
1414 #undef __FUNCT__
1415 #define __FUNCT__ "MatGetRow_MPIAIJ"
1416 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1417 {
1418   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1419   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1420   PetscErrorCode ierr;
1421   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1422   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1423   PetscInt       *cmap,*idx_p;
1424 
1425   PetscFunctionBegin;
1426   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1427   mat->getrowactive = PETSC_TRUE;
1428 
1429   if (!mat->rowvalues && (idx || v)) {
1430     /*
1431         allocate enough space to hold information from the longest row.
1432     */
1433     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1434     PetscInt     max = 1,tmp;
1435     for (i=0; i<matin->rmap->n; i++) {
1436       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1437       if (max < tmp) { max = tmp; }
1438     }
1439     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1440     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1441   }
1442 
1443   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1444   lrow = row - rstart;
1445 
1446   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1447   if (!v)   {pvA = 0; pvB = 0;}
1448   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1449   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1450   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1451   nztot = nzA + nzB;
1452 
1453   cmap  = mat->garray;
1454   if (v  || idx) {
1455     if (nztot) {
1456       /* Sort by increasing column numbers, assuming A and B already sorted */
1457       PetscInt imark = -1;
1458       if (v) {
1459         *v = v_p = mat->rowvalues;
1460         for (i=0; i<nzB; i++) {
1461           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1462           else break;
1463         }
1464         imark = i;
1465         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1466         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1467       }
1468       if (idx) {
1469         *idx = idx_p = mat->rowindices;
1470         if (imark > -1) {
1471           for (i=0; i<imark; i++) {
1472             idx_p[i] = cmap[cworkB[i]];
1473           }
1474         } else {
1475           for (i=0; i<nzB; i++) {
1476             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1477             else break;
1478           }
1479           imark = i;
1480         }
1481         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1482         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1483       }
1484     } else {
1485       if (idx) *idx = 0;
1486       if (v)   *v   = 0;
1487     }
1488   }
1489   *nz = nztot;
1490   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1491   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1492   PetscFunctionReturn(0);
1493 }
1494 
1495 #undef __FUNCT__
1496 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1497 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1498 {
1499   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1500 
1501   PetscFunctionBegin;
1502   if (!aij->getrowactive) {
1503     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1504   }
1505   aij->getrowactive = PETSC_FALSE;
1506   PetscFunctionReturn(0);
1507 }
1508 
1509 #undef __FUNCT__
1510 #define __FUNCT__ "MatNorm_MPIAIJ"
1511 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1512 {
1513   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1514   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1515   PetscErrorCode ierr;
1516   PetscInt       i,j,cstart = mat->cmap->rstart;
1517   PetscReal      sum = 0.0;
1518   MatScalar      *v;
1519 
1520   PetscFunctionBegin;
1521   if (aij->size == 1) {
1522     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1523   } else {
1524     if (type == NORM_FROBENIUS) {
1525       v = amat->a;
1526       for (i=0; i<amat->nz; i++) {
1527 #if defined(PETSC_USE_COMPLEX)
1528         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1529 #else
1530         sum += (*v)*(*v); v++;
1531 #endif
1532       }
1533       v = bmat->a;
1534       for (i=0; i<bmat->nz; i++) {
1535 #if defined(PETSC_USE_COMPLEX)
1536         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1537 #else
1538         sum += (*v)*(*v); v++;
1539 #endif
1540       }
1541       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1542       *norm = sqrt(*norm);
1543     } else if (type == NORM_1) { /* max column norm */
1544       PetscReal *tmp,*tmp2;
1545       PetscInt  *jj,*garray = aij->garray;
1546       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1547       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1548       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1549       *norm = 0.0;
1550       v = amat->a; jj = amat->j;
1551       for (j=0; j<amat->nz; j++) {
1552         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1553       }
1554       v = bmat->a; jj = bmat->j;
1555       for (j=0; j<bmat->nz; j++) {
1556         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1557       }
1558       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1559       for (j=0; j<mat->cmap->N; j++) {
1560         if (tmp2[j] > *norm) *norm = tmp2[j];
1561       }
1562       ierr = PetscFree(tmp);CHKERRQ(ierr);
1563       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1564     } else if (type == NORM_INFINITY) { /* max row norm */
1565       PetscReal ntemp = 0.0;
1566       for (j=0; j<aij->A->rmap->n; j++) {
1567         v = amat->a + amat->i[j];
1568         sum = 0.0;
1569         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1570           sum += PetscAbsScalar(*v); v++;
1571         }
1572         v = bmat->a + bmat->i[j];
1573         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1574           sum += PetscAbsScalar(*v); v++;
1575         }
1576         if (sum > ntemp) ntemp = sum;
1577       }
1578       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1579     } else {
1580       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1581     }
1582   }
1583   PetscFunctionReturn(0);
1584 }
1585 
1586 #undef __FUNCT__
1587 #define __FUNCT__ "MatTranspose_MPIAIJ"
1588 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1589 {
1590   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1591   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1592   PetscErrorCode ierr;
1593   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1594   PetscInt       cstart=A->cmap->rstart,ncol;
1595   Mat            B;
1596   MatScalar      *array;
1597 
1598   PetscFunctionBegin;
1599   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1600 
1601   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1602   ai = Aloc->i; aj = Aloc->j;
1603   bi = Bloc->i; bj = Bloc->j;
1604   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1605     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1606     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1607     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1608     for (i=0; i<ai[ma]; i++){
1609       d_nnz[aj[i]] ++;
1610       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1611     }
1612 
1613     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1614     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1615     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1616     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1617     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1618   } else {
1619     B = *matout;
1620   }
1621 
1622   /* copy over the A part */
1623   array = Aloc->a;
1624   row = A->rmap->rstart;
1625   for (i=0; i<ma; i++) {
1626     ncol = ai[i+1]-ai[i];
1627     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1628     row++; array += ncol; aj += ncol;
1629   }
1630   aj = Aloc->j;
1631   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1632 
1633   /* copy over the B part */
1634   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1635   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1636   array = Bloc->a;
1637   row = A->rmap->rstart;
1638   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1639   cols_tmp = cols;
1640   for (i=0; i<mb; i++) {
1641     ncol = bi[i+1]-bi[i];
1642     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1643     row++; array += ncol; cols_tmp += ncol;
1644   }
1645   ierr = PetscFree(cols);CHKERRQ(ierr);
1646 
1647   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1648   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1649   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1650     *matout = B;
1651   } else {
1652     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1653   }
1654   PetscFunctionReturn(0);
1655 }
1656 
1657 #undef __FUNCT__
1658 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1659 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1660 {
1661   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1662   Mat            a = aij->A,b = aij->B;
1663   PetscErrorCode ierr;
1664   PetscInt       s1,s2,s3;
1665 
1666   PetscFunctionBegin;
1667   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1668   if (rr) {
1669     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1670     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1671     /* Overlap communication with computation. */
1672     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1673   }
1674   if (ll) {
1675     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1676     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1677     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1678   }
1679   /* scale  the diagonal block */
1680   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1681 
1682   if (rr) {
1683     /* Do a scatter end and then right scale the off-diagonal block */
1684     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1685     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1686   }
1687 
1688   PetscFunctionReturn(0);
1689 }
1690 
1691 #undef __FUNCT__
1692 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1693 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1694 {
1695   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1696   PetscErrorCode ierr;
1697 
1698   PetscFunctionBegin;
1699   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1700   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1701   PetscFunctionReturn(0);
1702 }
1703 #undef __FUNCT__
1704 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1705 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1706 {
1707   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1708   PetscErrorCode ierr;
1709 
1710   PetscFunctionBegin;
1711   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1712   PetscFunctionReturn(0);
1713 }
1714 
1715 #undef __FUNCT__
1716 #define __FUNCT__ "MatEqual_MPIAIJ"
1717 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1718 {
1719   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1720   Mat            a,b,c,d;
1721   PetscTruth     flg;
1722   PetscErrorCode ierr;
1723 
1724   PetscFunctionBegin;
1725   a = matA->A; b = matA->B;
1726   c = matB->A; d = matB->B;
1727 
1728   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1729   if (flg) {
1730     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1731   }
1732   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1733   PetscFunctionReturn(0);
1734 }
1735 
1736 #undef __FUNCT__
1737 #define __FUNCT__ "MatCopy_MPIAIJ"
1738 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1739 {
1740   PetscErrorCode ierr;
1741   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1742   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1743 
1744   PetscFunctionBegin;
1745   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1746   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1747     /* because of the column compression in the off-processor part of the matrix a->B,
1748        the number of columns in a->B and b->B may be different, hence we cannot call
1749        the MatCopy() directly on the two parts. If need be, we can provide a more
1750        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1751        then copying the submatrices */
1752     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1753   } else {
1754     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1755     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1756   }
1757   PetscFunctionReturn(0);
1758 }
1759 
1760 #undef __FUNCT__
1761 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1762 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1763 {
1764   PetscErrorCode ierr;
1765 
1766   PetscFunctionBegin;
1767   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1768   PetscFunctionReturn(0);
1769 }
1770 
1771 #include "petscblaslapack.h"
1772 #undef __FUNCT__
1773 #define __FUNCT__ "MatAXPY_MPIAIJ"
1774 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1775 {
1776   PetscErrorCode ierr;
1777   PetscInt       i;
1778   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1779   PetscBLASInt   bnz,one=1;
1780   Mat_SeqAIJ     *x,*y;
1781 
1782   PetscFunctionBegin;
1783   if (str == SAME_NONZERO_PATTERN) {
1784     PetscScalar alpha = a;
1785     x = (Mat_SeqAIJ *)xx->A->data;
1786     y = (Mat_SeqAIJ *)yy->A->data;
1787     bnz = PetscBLASIntCast(x->nz);
1788     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1789     x = (Mat_SeqAIJ *)xx->B->data;
1790     y = (Mat_SeqAIJ *)yy->B->data;
1791     bnz = PetscBLASIntCast(x->nz);
1792     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1793   } else if (str == SUBSET_NONZERO_PATTERN) {
1794     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1795 
1796     x = (Mat_SeqAIJ *)xx->B->data;
1797     y = (Mat_SeqAIJ *)yy->B->data;
1798     if (y->xtoy && y->XtoY != xx->B) {
1799       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1800       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1801     }
1802     if (!y->xtoy) { /* get xtoy */
1803       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1804       y->XtoY = xx->B;
1805       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1806     }
1807     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1808   } else {
1809     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1810   }
1811   PetscFunctionReturn(0);
1812 }
1813 
1814 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1815 
1816 #undef __FUNCT__
1817 #define __FUNCT__ "MatConjugate_MPIAIJ"
1818 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1819 {
1820 #if defined(PETSC_USE_COMPLEX)
1821   PetscErrorCode ierr;
1822   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1823 
1824   PetscFunctionBegin;
1825   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1826   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1827 #else
1828   PetscFunctionBegin;
1829 #endif
1830   PetscFunctionReturn(0);
1831 }
1832 
1833 #undef __FUNCT__
1834 #define __FUNCT__ "MatRealPart_MPIAIJ"
1835 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1836 {
1837   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1838   PetscErrorCode ierr;
1839 
1840   PetscFunctionBegin;
1841   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1842   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1843   PetscFunctionReturn(0);
1844 }
1845 
1846 #undef __FUNCT__
1847 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1848 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1849 {
1850   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1851   PetscErrorCode ierr;
1852 
1853   PetscFunctionBegin;
1854   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1855   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1856   PetscFunctionReturn(0);
1857 }
1858 
1859 #ifdef PETSC_HAVE_PBGL
1860 
1861 #include <boost/parallel/mpi/bsp_process_group.hpp>
1862 #include <boost/graph/distributed/ilu_default_graph.hpp>
1863 #include <boost/graph/distributed/ilu_0_block.hpp>
1864 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1865 #include <boost/graph/distributed/petsc/interface.hpp>
1866 #include <boost/multi_array.hpp>
1867 #include <boost/parallel/distributed_property_map->hpp>
1868 
1869 #undef __FUNCT__
1870 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1871 /*
1872   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1873 */
1874 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1875 {
1876   namespace petsc = boost::distributed::petsc;
1877 
1878   namespace graph_dist = boost::graph::distributed;
1879   using boost::graph::distributed::ilu_default::process_group_type;
1880   using boost::graph::ilu_permuted;
1881 
1882   PetscTruth      row_identity, col_identity;
1883   PetscContainer  c;
1884   PetscInt        m, n, M, N;
1885   PetscErrorCode  ierr;
1886 
1887   PetscFunctionBegin;
1888   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1889   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1890   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1891   if (!row_identity || !col_identity) {
1892     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1893   }
1894 
1895   process_group_type pg;
1896   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1897   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1898   lgraph_type&   level_graph = *lgraph_p;
1899   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1900 
1901   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1902   ilu_permuted(level_graph);
1903 
1904   /* put together the new matrix */
1905   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1906   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1907   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1908   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1909   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1910   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1911   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1912 
1913   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1914   ierr = PetscContainerSetPointer(c, lgraph_p);
1915   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1916   PetscFunctionReturn(0);
1917 }
1918 
1919 #undef __FUNCT__
1920 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1921 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1922 {
1923   PetscFunctionBegin;
1924   PetscFunctionReturn(0);
1925 }
1926 
1927 #undef __FUNCT__
1928 #define __FUNCT__ "MatSolve_MPIAIJ"
1929 /*
1930   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1931 */
1932 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1933 {
1934   namespace graph_dist = boost::graph::distributed;
1935 
1936   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1937   lgraph_type*   lgraph_p;
1938   PetscContainer c;
1939   PetscErrorCode ierr;
1940 
1941   PetscFunctionBegin;
1942   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1943   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1944   ierr = VecCopy(b, x);CHKERRQ(ierr);
1945 
1946   PetscScalar* array_x;
1947   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1948   PetscInt sx;
1949   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1950 
1951   PetscScalar* array_b;
1952   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1953   PetscInt sb;
1954   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1955 
1956   lgraph_type&   level_graph = *lgraph_p;
1957   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1958 
1959   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
1960   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
1961                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
1962 
1963   typedef boost::iterator_property_map<array_ref_type::iterator,
1964                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
1965   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
1966                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
1967 
1968   ilu_set_solve(*lgraph_p, vector_b, vector_x);
1969 
1970   PetscFunctionReturn(0);
1971 }
1972 #endif
1973 
1974 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
1975   PetscInt       nzlocal,nsends,nrecvs;
1976   PetscMPIInt    *send_rank;
1977   PetscInt       *sbuf_nz,*sbuf_j,**rbuf_j;
1978   PetscScalar    *sbuf_a,**rbuf_a;
1979   PetscErrorCode (*MatDestroy)(Mat);
1980 } Mat_Redundant;
1981 
1982 #undef __FUNCT__
1983 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
1984 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
1985 {
1986   PetscErrorCode       ierr;
1987   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
1988   PetscInt             i;
1989 
1990   PetscFunctionBegin;
1991   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
1992   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1993   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1994   for (i=0; i<redund->nrecvs; i++){
1995     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1996     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1997   }
1998   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1999   ierr = PetscFree(redund);CHKERRQ(ierr);
2000   PetscFunctionReturn(0);
2001 }
2002 
2003 #undef __FUNCT__
2004 #define __FUNCT__ "MatDestroy_MatRedundant"
2005 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2006 {
2007   PetscErrorCode  ierr;
2008   PetscContainer  container;
2009   Mat_Redundant   *redund=PETSC_NULL;
2010 
2011   PetscFunctionBegin;
2012   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2013   if (container) {
2014     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2015   } else {
2016     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2017   }
2018   A->ops->destroy = redund->MatDestroy;
2019   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2020   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2021   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2022   PetscFunctionReturn(0);
2023 }
2024 
2025 #undef __FUNCT__
2026 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2027 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2028 {
2029   PetscMPIInt    rank,size;
2030   MPI_Comm       comm=((PetscObject)mat)->comm;
2031   PetscErrorCode ierr;
2032   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2033   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2034   PetscInt       *rowrange=mat->rmap->range;
2035   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2036   Mat            A=aij->A,B=aij->B,C=*matredundant;
2037   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2038   PetscScalar    *sbuf_a;
2039   PetscInt       nzlocal=a->nz+b->nz;
2040   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2041   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2042   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2043   MatScalar      *aworkA,*aworkB;
2044   PetscScalar    *vals;
2045   PetscMPIInt    tag1,tag2,tag3,imdex;
2046   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2047                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2048   MPI_Status     recv_status,*send_status;
2049   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2050   PetscInt       **rbuf_j=PETSC_NULL;
2051   PetscScalar    **rbuf_a=PETSC_NULL;
2052   Mat_Redundant  *redund=PETSC_NULL;
2053   PetscContainer container;
2054 
2055   PetscFunctionBegin;
2056   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2057   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2058 
2059   if (reuse == MAT_REUSE_MATRIX) {
2060     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2061     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2062     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2063     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2064     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2065     if (container) {
2066       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2067     } else {
2068       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2069     }
2070     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2071 
2072     nsends    = redund->nsends;
2073     nrecvs    = redund->nrecvs;
2074     send_rank = redund->send_rank; recv_rank = send_rank + size;
2075     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
2076     sbuf_j    = redund->sbuf_j;
2077     sbuf_a    = redund->sbuf_a;
2078     rbuf_j    = redund->rbuf_j;
2079     rbuf_a    = redund->rbuf_a;
2080   }
2081 
2082   if (reuse == MAT_INITIAL_MATRIX){
2083     PetscMPIInt  subrank,subsize;
2084     PetscInt     nleftover,np_subcomm;
2085     /* get the destination processors' id send_rank, nsends and nrecvs */
2086     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2087     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2088     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
2089     recv_rank = send_rank + size;
2090     np_subcomm = size/nsubcomm;
2091     nleftover  = size - nsubcomm*np_subcomm;
2092     nsends = 0; nrecvs = 0;
2093     for (i=0; i<size; i++){ /* i=rank*/
2094       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2095         send_rank[nsends] = i; nsends++;
2096         recv_rank[nrecvs++] = i;
2097       }
2098     }
2099     if (rank >= size - nleftover){/* this proc is a leftover processor */
2100       i = size-nleftover-1;
2101       j = 0;
2102       while (j < nsubcomm - nleftover){
2103         send_rank[nsends++] = i;
2104         i--; j++;
2105       }
2106     }
2107 
2108     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2109       for (i=0; i<nleftover; i++){
2110         recv_rank[nrecvs++] = size-nleftover+i;
2111       }
2112     }
2113 
2114     /* allocate sbuf_j, sbuf_a */
2115     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2116     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2117     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2118   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2119 
2120   /* copy mat's local entries into the buffers */
2121   if (reuse == MAT_INITIAL_MATRIX){
2122     rownz_max = 0;
2123     rptr = sbuf_j;
2124     cols = sbuf_j + rend-rstart + 1;
2125     vals = sbuf_a;
2126     rptr[0] = 0;
2127     for (i=0; i<rend-rstart; i++){
2128       row = i + rstart;
2129       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2130       ncols  = nzA + nzB;
2131       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2132       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2133       /* load the column indices for this row into cols */
2134       lwrite = 0;
2135       for (l=0; l<nzB; l++) {
2136         if ((ctmp = bmap[cworkB[l]]) < cstart){
2137           vals[lwrite]   = aworkB[l];
2138           cols[lwrite++] = ctmp;
2139         }
2140       }
2141       for (l=0; l<nzA; l++){
2142         vals[lwrite]   = aworkA[l];
2143         cols[lwrite++] = cstart + cworkA[l];
2144       }
2145       for (l=0; l<nzB; l++) {
2146         if ((ctmp = bmap[cworkB[l]]) >= cend){
2147           vals[lwrite]   = aworkB[l];
2148           cols[lwrite++] = ctmp;
2149         }
2150       }
2151       vals += ncols;
2152       cols += ncols;
2153       rptr[i+1] = rptr[i] + ncols;
2154       if (rownz_max < ncols) rownz_max = ncols;
2155     }
2156     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2157   } else { /* only copy matrix values into sbuf_a */
2158     rptr = sbuf_j;
2159     vals = sbuf_a;
2160     rptr[0] = 0;
2161     for (i=0; i<rend-rstart; i++){
2162       row = i + rstart;
2163       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2164       ncols  = nzA + nzB;
2165       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2166       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2167       lwrite = 0;
2168       for (l=0; l<nzB; l++) {
2169         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2170       }
2171       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2172       for (l=0; l<nzB; l++) {
2173         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2174       }
2175       vals += ncols;
2176       rptr[i+1] = rptr[i] + ncols;
2177     }
2178   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2179 
2180   /* send nzlocal to others, and recv other's nzlocal */
2181   /*--------------------------------------------------*/
2182   if (reuse == MAT_INITIAL_MATRIX){
2183     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2184     s_waits2 = s_waits3 + nsends;
2185     s_waits1 = s_waits2 + nsends;
2186     r_waits1 = s_waits1 + nsends;
2187     r_waits2 = r_waits1 + nrecvs;
2188     r_waits3 = r_waits2 + nrecvs;
2189   } else {
2190     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2191     r_waits3 = s_waits3 + nsends;
2192   }
2193 
2194   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2195   if (reuse == MAT_INITIAL_MATRIX){
2196     /* get new tags to keep the communication clean */
2197     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2198     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2199     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2200     rbuf_nz = sbuf_nz + nsends;
2201 
2202     /* post receives of other's nzlocal */
2203     for (i=0; i<nrecvs; i++){
2204       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2205     }
2206     /* send nzlocal to others */
2207     for (i=0; i<nsends; i++){
2208       sbuf_nz[i] = nzlocal;
2209       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2210     }
2211     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2212     count = nrecvs;
2213     while (count) {
2214       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2215       recv_rank[imdex] = recv_status.MPI_SOURCE;
2216       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2217       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2218 
2219       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2220       rbuf_nz[imdex] += i + 2;
2221       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2222       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2223       count--;
2224     }
2225     /* wait on sends of nzlocal */
2226     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2227     /* send mat->i,j to others, and recv from other's */
2228     /*------------------------------------------------*/
2229     for (i=0; i<nsends; i++){
2230       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2231       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2232     }
2233     /* wait on receives of mat->i,j */
2234     /*------------------------------*/
2235     count = nrecvs;
2236     while (count) {
2237       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2238       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2239       count--;
2240     }
2241     /* wait on sends of mat->i,j */
2242     /*---------------------------*/
2243     if (nsends) {
2244       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2245     }
2246   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2247 
2248   /* post receives, send and receive mat->a */
2249   /*----------------------------------------*/
2250   for (imdex=0; imdex<nrecvs; imdex++) {
2251     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2252   }
2253   for (i=0; i<nsends; i++){
2254     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2255   }
2256   count = nrecvs;
2257   while (count) {
2258     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2259     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2260     count--;
2261   }
2262   if (nsends) {
2263     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2264   }
2265 
2266   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2267 
2268   /* create redundant matrix */
2269   /*-------------------------*/
2270   if (reuse == MAT_INITIAL_MATRIX){
2271     /* compute rownz_max for preallocation */
2272     for (imdex=0; imdex<nrecvs; imdex++){
2273       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2274       rptr = rbuf_j[imdex];
2275       for (i=0; i<j; i++){
2276         ncols = rptr[i+1] - rptr[i];
2277         if (rownz_max < ncols) rownz_max = ncols;
2278       }
2279     }
2280 
2281     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2282     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2283     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2284     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2285     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2286   } else {
2287     C = *matredundant;
2288   }
2289 
2290   /* insert local matrix entries */
2291   rptr = sbuf_j;
2292   cols = sbuf_j + rend-rstart + 1;
2293   vals = sbuf_a;
2294   for (i=0; i<rend-rstart; i++){
2295     row   = i + rstart;
2296     ncols = rptr[i+1] - rptr[i];
2297     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2298     vals += ncols;
2299     cols += ncols;
2300   }
2301   /* insert received matrix entries */
2302   for (imdex=0; imdex<nrecvs; imdex++){
2303     rstart = rowrange[recv_rank[imdex]];
2304     rend   = rowrange[recv_rank[imdex]+1];
2305     rptr = rbuf_j[imdex];
2306     cols = rbuf_j[imdex] + rend-rstart + 1;
2307     vals = rbuf_a[imdex];
2308     for (i=0; i<rend-rstart; i++){
2309       row   = i + rstart;
2310       ncols = rptr[i+1] - rptr[i];
2311       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2312       vals += ncols;
2313       cols += ncols;
2314     }
2315   }
2316   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2317   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2318   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2319   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2320   if (reuse == MAT_INITIAL_MATRIX){
2321     PetscContainer container;
2322     *matredundant = C;
2323     /* create a supporting struct and attach it to C for reuse */
2324     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2325     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2326     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2327     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2328     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2329 
2330     redund->nzlocal = nzlocal;
2331     redund->nsends  = nsends;
2332     redund->nrecvs  = nrecvs;
2333     redund->send_rank = send_rank;
2334     redund->sbuf_nz = sbuf_nz;
2335     redund->sbuf_j  = sbuf_j;
2336     redund->sbuf_a  = sbuf_a;
2337     redund->rbuf_j  = rbuf_j;
2338     redund->rbuf_a  = rbuf_a;
2339 
2340     redund->MatDestroy = C->ops->destroy;
2341     C->ops->destroy    = MatDestroy_MatRedundant;
2342   }
2343   PetscFunctionReturn(0);
2344 }
2345 
2346 #undef __FUNCT__
2347 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2348 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2349 {
2350   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2351   PetscErrorCode ierr;
2352   PetscInt       i,*idxb = 0;
2353   PetscScalar    *va,*vb;
2354   Vec            vtmp;
2355 
2356   PetscFunctionBegin;
2357   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2358   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2359   if (idx) {
2360     for (i=0; i<A->rmap->n; i++) {
2361       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2362     }
2363   }
2364 
2365   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2366   if (idx) {
2367     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2368   }
2369   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2370   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2371 
2372   for (i=0; i<A->rmap->n; i++){
2373     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2374       va[i] = vb[i];
2375       if (idx) idx[i] = a->garray[idxb[i]];
2376     }
2377   }
2378 
2379   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2380   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2381   if (idxb) {
2382     ierr = PetscFree(idxb);CHKERRQ(ierr);
2383   }
2384   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2385   PetscFunctionReturn(0);
2386 }
2387 
2388 #undef __FUNCT__
2389 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2390 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2391 {
2392   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2393   PetscErrorCode ierr;
2394   PetscInt       i,*idxb = 0;
2395   PetscScalar    *va,*vb;
2396   Vec            vtmp;
2397 
2398   PetscFunctionBegin;
2399   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2400   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2401   if (idx) {
2402     for (i=0; i<A->cmap->n; i++) {
2403       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2404     }
2405   }
2406 
2407   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2408   if (idx) {
2409     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2410   }
2411   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2412   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2413 
2414   for (i=0; i<A->rmap->n; i++){
2415     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2416       va[i] = vb[i];
2417       if (idx) idx[i] = a->garray[idxb[i]];
2418     }
2419   }
2420 
2421   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2422   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2423   if (idxb) {
2424     ierr = PetscFree(idxb);CHKERRQ(ierr);
2425   }
2426   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2427   PetscFunctionReturn(0);
2428 }
2429 
2430 #undef __FUNCT__
2431 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2432 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2433 {
2434   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2435   PetscInt       n      = A->rmap->n;
2436   PetscInt       cstart = A->cmap->rstart;
2437   PetscInt      *cmap   = mat->garray;
2438   PetscInt      *diagIdx, *offdiagIdx;
2439   Vec            diagV, offdiagV;
2440   PetscScalar   *a, *diagA, *offdiagA;
2441   PetscInt       r;
2442   PetscErrorCode ierr;
2443 
2444   PetscFunctionBegin;
2445   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2446   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2447   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2448   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2449   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2450   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2451   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2452   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2453   for(r = 0; r < n; ++r) {
2454     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2455       a[r]   = diagA[r];
2456       idx[r] = cstart + diagIdx[r];
2457     } else {
2458       a[r]   = offdiagA[r];
2459       idx[r] = cmap[offdiagIdx[r]];
2460     }
2461   }
2462   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2463   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2464   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2465   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2466   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2467   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2468   PetscFunctionReturn(0);
2469 }
2470 
2471 #undef __FUNCT__
2472 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2473 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2474 {
2475   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2476   PetscInt       n      = A->rmap->n;
2477   PetscInt       cstart = A->cmap->rstart;
2478   PetscInt      *cmap   = mat->garray;
2479   PetscInt      *diagIdx, *offdiagIdx;
2480   Vec            diagV, offdiagV;
2481   PetscScalar   *a, *diagA, *offdiagA;
2482   PetscInt       r;
2483   PetscErrorCode ierr;
2484 
2485   PetscFunctionBegin;
2486   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2487   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2488   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2489   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2490   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2491   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2492   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2493   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2494   for(r = 0; r < n; ++r) {
2495     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2496       a[r]   = diagA[r];
2497       idx[r] = cstart + diagIdx[r];
2498     } else {
2499       a[r]   = offdiagA[r];
2500       idx[r] = cmap[offdiagIdx[r]];
2501     }
2502   }
2503   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2504   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2505   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2506   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2507   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2508   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2509   PetscFunctionReturn(0);
2510 }
2511 
2512 #undef __FUNCT__
2513 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2514 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[])
2515 {
2516   PetscErrorCode ierr;
2517 
2518   PetscFunctionBegin;
2519   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
2520   PetscFunctionReturn(0);
2521 }
2522 
2523 /* -------------------------------------------------------------------*/
2524 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2525        MatGetRow_MPIAIJ,
2526        MatRestoreRow_MPIAIJ,
2527        MatMult_MPIAIJ,
2528 /* 4*/ MatMultAdd_MPIAIJ,
2529        MatMultTranspose_MPIAIJ,
2530        MatMultTransposeAdd_MPIAIJ,
2531 #ifdef PETSC_HAVE_PBGL
2532        MatSolve_MPIAIJ,
2533 #else
2534        0,
2535 #endif
2536        0,
2537        0,
2538 /*10*/ 0,
2539        0,
2540        0,
2541        MatRelax_MPIAIJ,
2542        MatTranspose_MPIAIJ,
2543 /*15*/ MatGetInfo_MPIAIJ,
2544        MatEqual_MPIAIJ,
2545        MatGetDiagonal_MPIAIJ,
2546        MatDiagonalScale_MPIAIJ,
2547        MatNorm_MPIAIJ,
2548 /*20*/ MatAssemblyBegin_MPIAIJ,
2549        MatAssemblyEnd_MPIAIJ,
2550        MatSetOption_MPIAIJ,
2551        MatZeroEntries_MPIAIJ,
2552 /*24*/ MatZeroRows_MPIAIJ,
2553        0,
2554 #ifdef PETSC_HAVE_PBGL
2555        0,
2556 #else
2557        0,
2558 #endif
2559        0,
2560        0,
2561 /*29*/ MatSetUpPreallocation_MPIAIJ,
2562 #ifdef PETSC_HAVE_PBGL
2563        0,
2564 #else
2565        0,
2566 #endif
2567        0,
2568        0,
2569        0,
2570 /*34*/ MatDuplicate_MPIAIJ,
2571        0,
2572        0,
2573        0,
2574        0,
2575 /*39*/ MatAXPY_MPIAIJ,
2576        MatGetSubMatrices_MPIAIJ,
2577        MatIncreaseOverlap_MPIAIJ,
2578        MatGetValues_MPIAIJ,
2579        MatCopy_MPIAIJ,
2580 /*44*/ MatGetRowMax_MPIAIJ,
2581        MatScale_MPIAIJ,
2582        0,
2583        0,
2584        0,
2585 /*49*/ MatSetBlockSize_MPIAIJ,
2586        0,
2587        0,
2588        0,
2589        0,
2590 /*54*/ MatFDColoringCreate_MPIAIJ,
2591        0,
2592        MatSetUnfactored_MPIAIJ,
2593        MatPermute_MPIAIJ,
2594        0,
2595 /*59*/ MatGetSubMatrix_MPIAIJ,
2596        MatDestroy_MPIAIJ,
2597        MatView_MPIAIJ,
2598        0,
2599        0,
2600 /*64*/ 0,
2601        0,
2602        0,
2603        0,
2604        0,
2605 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2606        MatGetRowMinAbs_MPIAIJ,
2607        0,
2608        MatSetColoring_MPIAIJ,
2609 #if defined(PETSC_HAVE_ADIC)
2610        MatSetValuesAdic_MPIAIJ,
2611 #else
2612        0,
2613 #endif
2614        MatSetValuesAdifor_MPIAIJ,
2615 /*75*/ 0,
2616        0,
2617        0,
2618        0,
2619        0,
2620 /*80*/ 0,
2621        0,
2622        0,
2623 /*83*/ MatLoad_MPIAIJ,
2624        0,
2625        0,
2626        0,
2627        0,
2628        0,
2629 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2630        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2631        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2632        MatPtAP_Basic,
2633        MatPtAPSymbolic_MPIAIJ,
2634 /*94*/ MatPtAPNumeric_MPIAIJ,
2635        0,
2636        0,
2637        0,
2638        0,
2639 /*99*/ 0,
2640        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2641        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2642        MatConjugate_MPIAIJ,
2643        0,
2644 /*104*/MatSetValuesRow_MPIAIJ,
2645        MatRealPart_MPIAIJ,
2646        MatImaginaryPart_MPIAIJ,
2647        0,
2648        0,
2649 /*109*/0,
2650        MatGetRedundantMatrix_MPIAIJ,
2651        MatGetRowMin_MPIAIJ,
2652        0,
2653        0,
2654 /*114*/MatGetSeqNonzerostructure_MPIAIJ};
2655 
2656 /* ----------------------------------------------------------------------------------------*/
2657 
2658 EXTERN_C_BEGIN
2659 #undef __FUNCT__
2660 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2661 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2662 {
2663   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2664   PetscErrorCode ierr;
2665 
2666   PetscFunctionBegin;
2667   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2668   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2669   PetscFunctionReturn(0);
2670 }
2671 EXTERN_C_END
2672 
2673 EXTERN_C_BEGIN
2674 #undef __FUNCT__
2675 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2676 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2677 {
2678   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2679   PetscErrorCode ierr;
2680 
2681   PetscFunctionBegin;
2682   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2683   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2684   PetscFunctionReturn(0);
2685 }
2686 EXTERN_C_END
2687 
2688 #include "petscpc.h"
2689 EXTERN_C_BEGIN
2690 #undef __FUNCT__
2691 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2692 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2693 {
2694   Mat_MPIAIJ     *b;
2695   PetscErrorCode ierr;
2696   PetscInt       i;
2697 
2698   PetscFunctionBegin;
2699   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2700   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2701   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2702   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2703 
2704   ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2705   ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2706   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2707   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2708   if (d_nnz) {
2709     for (i=0; i<B->rmap->n; i++) {
2710       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2711     }
2712   }
2713   if (o_nnz) {
2714     for (i=0; i<B->rmap->n; i++) {
2715       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2716     }
2717   }
2718   b = (Mat_MPIAIJ*)B->data;
2719 
2720   if (!B->preallocated) {
2721     /* Explicitly create 2 MATSEQAIJ matrices. */
2722     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2723     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2724     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2725     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2726     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2727     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2728     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2729     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2730   }
2731 
2732   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2733   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2734   B->preallocated = PETSC_TRUE;
2735   PetscFunctionReturn(0);
2736 }
2737 EXTERN_C_END
2738 
2739 #undef __FUNCT__
2740 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2741 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2742 {
2743   Mat            mat;
2744   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2745   PetscErrorCode ierr;
2746 
2747   PetscFunctionBegin;
2748   *newmat       = 0;
2749   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2750   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2751   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2752   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2753   a    = (Mat_MPIAIJ*)mat->data;
2754 
2755   mat->factor       = matin->factor;
2756   mat->rmap->bs      = matin->rmap->bs;
2757   mat->assembled    = PETSC_TRUE;
2758   mat->insertmode   = NOT_SET_VALUES;
2759   mat->preallocated = PETSC_TRUE;
2760 
2761   a->size           = oldmat->size;
2762   a->rank           = oldmat->rank;
2763   a->donotstash     = oldmat->donotstash;
2764   a->roworiented    = oldmat->roworiented;
2765   a->rowindices     = 0;
2766   a->rowvalues      = 0;
2767   a->getrowactive   = PETSC_FALSE;
2768 
2769   ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr);
2770   ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr);
2771 
2772   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2773   if (oldmat->colmap) {
2774 #if defined (PETSC_USE_CTABLE)
2775     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2776 #else
2777     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2778     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2779     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2780 #endif
2781   } else a->colmap = 0;
2782   if (oldmat->garray) {
2783     PetscInt len;
2784     len  = oldmat->B->cmap->n;
2785     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2786     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2787     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2788   } else a->garray = 0;
2789 
2790   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2791   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2792   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2793   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2794   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2795   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2796   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2797   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2798   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2799   *newmat = mat;
2800   PetscFunctionReturn(0);
2801 }
2802 
2803 #include "petscsys.h"
2804 
2805 #undef __FUNCT__
2806 #define __FUNCT__ "MatLoad_MPIAIJ"
2807 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2808 {
2809   Mat            A;
2810   PetscScalar    *vals,*svals;
2811   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2812   MPI_Status     status;
2813   PetscErrorCode ierr;
2814   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz;
2815   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2816   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2817   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2818   PetscInt       cend,cstart,n,*rowners;
2819   int            fd;
2820 
2821   PetscFunctionBegin;
2822   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2823   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2824   if (!rank) {
2825     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2826     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2827     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2828   }
2829 
2830   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2831   M = header[1]; N = header[2];
2832   /* determine ownership of all rows */
2833   m    = M/size + ((M % size) > rank);
2834   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2835   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2836 
2837   /* First process needs enough room for process with most rows */
2838   if (!rank) {
2839     mmax       = rowners[1];
2840     for (i=2; i<size; i++) {
2841       mmax = PetscMax(mmax,rowners[i]);
2842     }
2843   } else mmax = m;
2844 
2845   rowners[0] = 0;
2846   for (i=2; i<=size; i++) {
2847     rowners[i] += rowners[i-1];
2848   }
2849   rstart = rowners[rank];
2850   rend   = rowners[rank+1];
2851 
2852   /* distribute row lengths to all processors */
2853   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2854   if (!rank) {
2855     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2856     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2857     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2858     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2859     for (j=0; j<m; j++) {
2860       procsnz[0] += ourlens[j];
2861     }
2862     for (i=1; i<size; i++) {
2863       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2864       /* calculate the number of nonzeros on each processor */
2865       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2866         procsnz[i] += rowlengths[j];
2867       }
2868       mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]);
2869       ierr   = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2870     }
2871     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2872   } else {
2873     mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr);
2874     ierr   = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2875   }
2876 
2877   if (!rank) {
2878     /* determine max buffer needed and allocate it */
2879     maxnz = 0;
2880     for (i=0; i<size; i++) {
2881       maxnz = PetscMax(maxnz,procsnz[i]);
2882     }
2883     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2884 
2885     /* read in my part of the matrix column indices  */
2886     nz   = procsnz[0];
2887     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2888     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2889 
2890     /* read in every one elses and ship off */
2891     for (i=1; i<size; i++) {
2892       nz     = procsnz[i];
2893       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2894       mpicnt = PetscMPIIntCast(nz);
2895       ierr   = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2896     }
2897     ierr = PetscFree(cols);CHKERRQ(ierr);
2898   } else {
2899     /* determine buffer space needed for message */
2900     nz = 0;
2901     for (i=0; i<m; i++) {
2902       nz += ourlens[i];
2903     }
2904     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2905 
2906     /* receive message of column indices*/
2907     mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr);
2908     ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2909     ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr);
2910     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
2911     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
2912     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
2913   }
2914 
2915   /* determine column ownership if matrix is not square */
2916   if (N != M) {
2917     n      = N/size + ((N % size) > rank);
2918     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2919     cstart = cend - n;
2920   } else {
2921     cstart = rstart;
2922     cend   = rend;
2923     n      = cend - cstart;
2924   }
2925 
2926   /* loop over local rows, determining number of off diagonal entries */
2927   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2928   jj = 0;
2929   for (i=0; i<m; i++) {
2930     for (j=0; j<ourlens[i]; j++) {
2931       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2932       jj++;
2933     }
2934   }
2935 
2936   /* create our matrix */
2937   for (i=0; i<m; i++) {
2938     ourlens[i] -= offlens[i];
2939   }
2940   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2941   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2942   ierr = MatSetType(A,type);CHKERRQ(ierr);
2943   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2944 
2945   for (i=0; i<m; i++) {
2946     ourlens[i] += offlens[i];
2947   }
2948 
2949   if (!rank) {
2950     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2951 
2952     /* read in my part of the matrix numerical values  */
2953     nz   = procsnz[0];
2954     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2955 
2956     /* insert into matrix */
2957     jj      = rstart;
2958     smycols = mycols;
2959     svals   = vals;
2960     for (i=0; i<m; i++) {
2961       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2962       smycols += ourlens[i];
2963       svals   += ourlens[i];
2964       jj++;
2965     }
2966 
2967     /* read in other processors and ship out */
2968     for (i=1; i<size; i++) {
2969       nz     = procsnz[i];
2970       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2971       mpicnt = PetscMPIIntCast(nz);
2972       ierr   = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2973     }
2974     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2975   } else {
2976     /* receive numeric values */
2977     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2978 
2979     /* receive message of values*/
2980     mpicnt = PetscMPIIntCast(nz);
2981     ierr   = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2982     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr);
2983     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
2984     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
2985     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
2986 
2987     /* insert into matrix */
2988     jj      = rstart;
2989     smycols = mycols;
2990     svals   = vals;
2991     for (i=0; i<m; i++) {
2992       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2993       smycols += ourlens[i];
2994       svals   += ourlens[i];
2995       jj++;
2996     }
2997   }
2998   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2999   ierr = PetscFree(vals);CHKERRQ(ierr);
3000   ierr = PetscFree(mycols);CHKERRQ(ierr);
3001   ierr = PetscFree(rowners);CHKERRQ(ierr);
3002 
3003   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3004   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3005   *newmat = A;
3006   PetscFunctionReturn(0);
3007 }
3008 
3009 #undef __FUNCT__
3010 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3011 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3012 {
3013   PetscErrorCode ierr;
3014   IS             iscol_local;
3015   PetscInt       csize;
3016 
3017   PetscFunctionBegin;
3018   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3019   if (call == MAT_REUSE_MATRIX) {
3020     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3021     if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3022   } else {
3023     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3024   }
3025   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3026   if (call == MAT_INITIAL_MATRIX) {
3027     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3028     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3029   }
3030   PetscFunctionReturn(0);
3031 }
3032 
3033 #undef __FUNCT__
3034 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3035 /*
3036     Not great since it makes two copies of the submatrix, first an SeqAIJ
3037   in local and then by concatenating the local matrices the end result.
3038   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3039 
3040   Note: This requires a sequential iscol with all indices.
3041 */
3042 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3043 {
3044   PetscErrorCode ierr;
3045   PetscMPIInt    rank,size;
3046   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3047   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3048   Mat            *local,M,Mreuse;
3049   MatScalar      *vwork,*aa;
3050   MPI_Comm       comm = ((PetscObject)mat)->comm;
3051   Mat_SeqAIJ     *aij;
3052 
3053 
3054   PetscFunctionBegin;
3055   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3056   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3057 
3058   if (call ==  MAT_REUSE_MATRIX) {
3059     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3060     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3061     local = &Mreuse;
3062     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3063   } else {
3064     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3065     Mreuse = *local;
3066     ierr   = PetscFree(local);CHKERRQ(ierr);
3067   }
3068 
3069   /*
3070       m - number of local rows
3071       n - number of columns (same on all processors)
3072       rstart - first row in new global matrix generated
3073   */
3074   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3075   if (call == MAT_INITIAL_MATRIX) {
3076     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3077     ii  = aij->i;
3078     jj  = aij->j;
3079 
3080     /*
3081         Determine the number of non-zeros in the diagonal and off-diagonal
3082         portions of the matrix in order to do correct preallocation
3083     */
3084 
3085     /* first get start and end of "diagonal" columns */
3086     if (csize == PETSC_DECIDE) {
3087       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3088       if (mglobal == n) { /* square matrix */
3089 	nlocal = m;
3090       } else {
3091         nlocal = n/size + ((n % size) > rank);
3092       }
3093     } else {
3094       nlocal = csize;
3095     }
3096     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3097     rstart = rend - nlocal;
3098     if (rank == size - 1 && rend != n) {
3099       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3100     }
3101 
3102     /* next, compute all the lengths */
3103     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3104     olens = dlens + m;
3105     for (i=0; i<m; i++) {
3106       jend = ii[i+1] - ii[i];
3107       olen = 0;
3108       dlen = 0;
3109       for (j=0; j<jend; j++) {
3110         if (*jj < rstart || *jj >= rend) olen++;
3111         else dlen++;
3112         jj++;
3113       }
3114       olens[i] = olen;
3115       dlens[i] = dlen;
3116     }
3117     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3118     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3119     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3120     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3121     ierr = PetscFree(dlens);CHKERRQ(ierr);
3122   } else {
3123     PetscInt ml,nl;
3124 
3125     M = *newmat;
3126     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3127     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3128     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3129     /*
3130          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3131        rather than the slower MatSetValues().
3132     */
3133     M->was_assembled = PETSC_TRUE;
3134     M->assembled     = PETSC_FALSE;
3135   }
3136   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3137   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3138   ii  = aij->i;
3139   jj  = aij->j;
3140   aa  = aij->a;
3141   for (i=0; i<m; i++) {
3142     row   = rstart + i;
3143     nz    = ii[i+1] - ii[i];
3144     cwork = jj;     jj += nz;
3145     vwork = aa;     aa += nz;
3146     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3147   }
3148 
3149   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3150   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3151   *newmat = M;
3152 
3153   /* save submatrix used in processor for next request */
3154   if (call ==  MAT_INITIAL_MATRIX) {
3155     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3156     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3157   }
3158 
3159   PetscFunctionReturn(0);
3160 }
3161 
3162 EXTERN_C_BEGIN
3163 #undef __FUNCT__
3164 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3165 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3166 {
3167   PetscInt       m,cstart, cend,j,nnz,i,d;
3168   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3169   const PetscInt *JJ;
3170   PetscScalar    *values;
3171   PetscErrorCode ierr;
3172 
3173   PetscFunctionBegin;
3174   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3175 
3176   ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3177   ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3178   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
3179   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
3180   m      = B->rmap->n;
3181   cstart = B->cmap->rstart;
3182   cend   = B->cmap->rend;
3183   rstart = B->rmap->rstart;
3184 
3185   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
3186   o_nnz = d_nnz + m;
3187 
3188 #if defined(PETSC_USE_DEBUGGING)
3189   for (i=0; i<m; i++) {
3190     nnz     = Ii[i+1]- Ii[i];
3191     JJ      = J + Ii[i];
3192     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3193     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3194     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3195     for (j=1; j<nnz; j++) {
3196       if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j);
3197     }
3198   }
3199 #endif
3200 
3201   for (i=0; i<m; i++) {
3202     nnz     = Ii[i+1]- Ii[i];
3203     JJ      = J + Ii[i];
3204     nnz_max = PetscMax(nnz_max,nnz);
3205     for (j=0; j<nnz; j++) {
3206       if (*JJ >= cstart) break;
3207       JJ++;
3208     }
3209     d = 0;
3210     for (; j<nnz; j++) {
3211       if (*JJ++ >= cend) break;
3212       d++;
3213     }
3214     d_nnz[i] = d;
3215     o_nnz[i] = nnz - d;
3216   }
3217   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3218   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
3219 
3220   if (v) values = (PetscScalar*)v;
3221   else {
3222     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3223     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3224   }
3225 
3226   for (i=0; i<m; i++) {
3227     ii   = i + rstart;
3228     nnz  = Ii[i+1]- Ii[i];
3229     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3230   }
3231   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3232   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3233 
3234   if (!v) {
3235     ierr = PetscFree(values);CHKERRQ(ierr);
3236   }
3237   PetscFunctionReturn(0);
3238 }
3239 EXTERN_C_END
3240 
3241 #undef __FUNCT__
3242 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3243 /*@
3244    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3245    (the default parallel PETSc format).
3246 
3247    Collective on MPI_Comm
3248 
3249    Input Parameters:
3250 +  B - the matrix
3251 .  i - the indices into j for the start of each local row (starts with zero)
3252 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3253 -  v - optional values in the matrix
3254 
3255    Level: developer
3256 
3257    Notes:
3258        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3259      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3260      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3261 
3262        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3263 
3264        The format which is used for the sparse matrix input, is equivalent to a
3265     row-major ordering.. i.e for the following matrix, the input data expected is
3266     as shown:
3267 
3268         1 0 0
3269         2 0 3     P0
3270        -------
3271         4 5 6     P1
3272 
3273      Process0 [P0]: rows_owned=[0,1]
3274         i =  {0,1,3}  [size = nrow+1  = 2+1]
3275         j =  {0,0,2}  [size = nz = 6]
3276         v =  {1,2,3}  [size = nz = 6]
3277 
3278      Process1 [P1]: rows_owned=[2]
3279         i =  {0,3}    [size = nrow+1  = 1+1]
3280         j =  {0,1,2}  [size = nz = 6]
3281         v =  {4,5,6}  [size = nz = 6]
3282 
3283       The column indices for each row MUST be sorted.
3284 
3285 .keywords: matrix, aij, compressed row, sparse, parallel
3286 
3287 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3288           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3289 @*/
3290 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3291 {
3292   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3293 
3294   PetscFunctionBegin;
3295   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3296   if (f) {
3297     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3298   }
3299   PetscFunctionReturn(0);
3300 }
3301 
3302 #undef __FUNCT__
3303 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3304 /*@C
3305    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3306    (the default parallel PETSc format).  For good matrix assembly performance
3307    the user should preallocate the matrix storage by setting the parameters
3308    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3309    performance can be increased by more than a factor of 50.
3310 
3311    Collective on MPI_Comm
3312 
3313    Input Parameters:
3314 +  A - the matrix
3315 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3316            (same value is used for all local rows)
3317 .  d_nnz - array containing the number of nonzeros in the various rows of the
3318            DIAGONAL portion of the local submatrix (possibly different for each row)
3319            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3320            The size of this array is equal to the number of local rows, i.e 'm'.
3321            You must leave room for the diagonal entry even if it is zero.
3322 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3323            submatrix (same value is used for all local rows).
3324 -  o_nnz - array containing the number of nonzeros in the various rows of the
3325            OFF-DIAGONAL portion of the local submatrix (possibly different for
3326            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3327            structure. The size of this array is equal to the number
3328            of local rows, i.e 'm'.
3329 
3330    If the *_nnz parameter is given then the *_nz parameter is ignored
3331 
3332    The AIJ format (also called the Yale sparse matrix format or
3333    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3334    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3335 
3336    The parallel matrix is partitioned such that the first m0 rows belong to
3337    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3338    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3339 
3340    The DIAGONAL portion of the local submatrix of a processor can be defined
3341    as the submatrix which is obtained by extraction the part corresponding
3342    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3343    first row that belongs to the processor, and r2 is the last row belonging
3344    to the this processor. This is a square mxm matrix. The remaining portion
3345    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3346 
3347    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3348 
3349    You can call MatGetInfo() to get information on how effective the preallocation was;
3350    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3351    You can also run with the option -info and look for messages with the string
3352    malloc in them to see if additional memory allocation was needed.
3353 
3354    Example usage:
3355 
3356    Consider the following 8x8 matrix with 34 non-zero values, that is
3357    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3358    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3359    as follows:
3360 
3361 .vb
3362             1  2  0  |  0  3  0  |  0  4
3363     Proc0   0  5  6  |  7  0  0  |  8  0
3364             9  0 10  | 11  0  0  | 12  0
3365     -------------------------------------
3366            13  0 14  | 15 16 17  |  0  0
3367     Proc1   0 18  0  | 19 20 21  |  0  0
3368             0  0  0  | 22 23  0  | 24  0
3369     -------------------------------------
3370     Proc2  25 26 27  |  0  0 28  | 29  0
3371            30  0  0  | 31 32 33  |  0 34
3372 .ve
3373 
3374    This can be represented as a collection of submatrices as:
3375 
3376 .vb
3377       A B C
3378       D E F
3379       G H I
3380 .ve
3381 
3382    Where the submatrices A,B,C are owned by proc0, D,E,F are
3383    owned by proc1, G,H,I are owned by proc2.
3384 
3385    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3386    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3387    The 'M','N' parameters are 8,8, and have the same values on all procs.
3388 
3389    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3390    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3391    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3392    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3393    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3394    matrix, ans [DF] as another SeqAIJ matrix.
3395 
3396    When d_nz, o_nz parameters are specified, d_nz storage elements are
3397    allocated for every row of the local diagonal submatrix, and o_nz
3398    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3399    One way to choose d_nz and o_nz is to use the max nonzerors per local
3400    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3401    In this case, the values of d_nz,o_nz are:
3402 .vb
3403      proc0 : dnz = 2, o_nz = 2
3404      proc1 : dnz = 3, o_nz = 2
3405      proc2 : dnz = 1, o_nz = 4
3406 .ve
3407    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3408    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3409    for proc3. i.e we are using 12+15+10=37 storage locations to store
3410    34 values.
3411 
3412    When d_nnz, o_nnz parameters are specified, the storage is specified
3413    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3414    In the above case the values for d_nnz,o_nnz are:
3415 .vb
3416      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3417      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3418      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3419 .ve
3420    Here the space allocated is sum of all the above values i.e 34, and
3421    hence pre-allocation is perfect.
3422 
3423    Level: intermediate
3424 
3425 .keywords: matrix, aij, compressed row, sparse, parallel
3426 
3427 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3428           MPIAIJ, MatGetInfo()
3429 @*/
3430 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3431 {
3432   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3433 
3434   PetscFunctionBegin;
3435   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3436   if (f) {
3437     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3438   }
3439   PetscFunctionReturn(0);
3440 }
3441 
3442 #undef __FUNCT__
3443 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3444 /*@
3445      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3446          CSR format the local rows.
3447 
3448    Collective on MPI_Comm
3449 
3450    Input Parameters:
3451 +  comm - MPI communicator
3452 .  m - number of local rows (Cannot be PETSC_DECIDE)
3453 .  n - This value should be the same as the local size used in creating the
3454        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3455        calculated if N is given) For square matrices n is almost always m.
3456 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3457 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3458 .   i - row indices
3459 .   j - column indices
3460 -   a - matrix values
3461 
3462    Output Parameter:
3463 .   mat - the matrix
3464 
3465    Level: intermediate
3466 
3467    Notes:
3468        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3469      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3470      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3471 
3472        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3473 
3474        The format which is used for the sparse matrix input, is equivalent to a
3475     row-major ordering.. i.e for the following matrix, the input data expected is
3476     as shown:
3477 
3478         1 0 0
3479         2 0 3     P0
3480        -------
3481         4 5 6     P1
3482 
3483      Process0 [P0]: rows_owned=[0,1]
3484         i =  {0,1,3}  [size = nrow+1  = 2+1]
3485         j =  {0,0,2}  [size = nz = 6]
3486         v =  {1,2,3}  [size = nz = 6]
3487 
3488      Process1 [P1]: rows_owned=[2]
3489         i =  {0,3}    [size = nrow+1  = 1+1]
3490         j =  {0,1,2}  [size = nz = 6]
3491         v =  {4,5,6}  [size = nz = 6]
3492 
3493 .keywords: matrix, aij, compressed row, sparse, parallel
3494 
3495 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3496           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3497 @*/
3498 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3499 {
3500   PetscErrorCode ierr;
3501 
3502  PetscFunctionBegin;
3503   if (i[0]) {
3504     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3505   }
3506   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3507   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3508   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3509   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3510   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3511   PetscFunctionReturn(0);
3512 }
3513 
3514 #undef __FUNCT__
3515 #define __FUNCT__ "MatCreateMPIAIJ"
3516 /*@C
3517    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3518    (the default parallel PETSc format).  For good matrix assembly performance
3519    the user should preallocate the matrix storage by setting the parameters
3520    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3521    performance can be increased by more than a factor of 50.
3522 
3523    Collective on MPI_Comm
3524 
3525    Input Parameters:
3526 +  comm - MPI communicator
3527 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3528            This value should be the same as the local size used in creating the
3529            y vector for the matrix-vector product y = Ax.
3530 .  n - This value should be the same as the local size used in creating the
3531        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3532        calculated if N is given) For square matrices n is almost always m.
3533 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3534 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3535 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3536            (same value is used for all local rows)
3537 .  d_nnz - array containing the number of nonzeros in the various rows of the
3538            DIAGONAL portion of the local submatrix (possibly different for each row)
3539            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3540            The size of this array is equal to the number of local rows, i.e 'm'.
3541            You must leave room for the diagonal entry even if it is zero.
3542 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3543            submatrix (same value is used for all local rows).
3544 -  o_nnz - array containing the number of nonzeros in the various rows of the
3545            OFF-DIAGONAL portion of the local submatrix (possibly different for
3546            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3547            structure. The size of this array is equal to the number
3548            of local rows, i.e 'm'.
3549 
3550    Output Parameter:
3551 .  A - the matrix
3552 
3553    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3554    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3555    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3556 
3557    Notes:
3558    If the *_nnz parameter is given then the *_nz parameter is ignored
3559 
3560    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3561    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3562    storage requirements for this matrix.
3563 
3564    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3565    processor than it must be used on all processors that share the object for
3566    that argument.
3567 
3568    The user MUST specify either the local or global matrix dimensions
3569    (possibly both).
3570 
3571    The parallel matrix is partitioned across processors such that the
3572    first m0 rows belong to process 0, the next m1 rows belong to
3573    process 1, the next m2 rows belong to process 2 etc.. where
3574    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3575    values corresponding to [m x N] submatrix.
3576 
3577    The columns are logically partitioned with the n0 columns belonging
3578    to 0th partition, the next n1 columns belonging to the next
3579    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3580 
3581    The DIAGONAL portion of the local submatrix on any given processor
3582    is the submatrix corresponding to the rows and columns m,n
3583    corresponding to the given processor. i.e diagonal matrix on
3584    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3585    etc. The remaining portion of the local submatrix [m x (N-n)]
3586    constitute the OFF-DIAGONAL portion. The example below better
3587    illustrates this concept.
3588 
3589    For a square global matrix we define each processor's diagonal portion
3590    to be its local rows and the corresponding columns (a square submatrix);
3591    each processor's off-diagonal portion encompasses the remainder of the
3592    local matrix (a rectangular submatrix).
3593 
3594    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3595 
3596    When calling this routine with a single process communicator, a matrix of
3597    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3598    type of communicator, use the construction mechanism:
3599      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3600 
3601    By default, this format uses inodes (identical nodes) when possible.
3602    We search for consecutive rows with the same nonzero structure, thereby
3603    reusing matrix information to achieve increased efficiency.
3604 
3605    Options Database Keys:
3606 +  -mat_no_inode  - Do not use inodes
3607 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3608 -  -mat_aij_oneindex - Internally use indexing starting at 1
3609         rather than 0.  Note that when calling MatSetValues(),
3610         the user still MUST index entries starting at 0!
3611 
3612 
3613    Example usage:
3614 
3615    Consider the following 8x8 matrix with 34 non-zero values, that is
3616    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3617    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3618    as follows:
3619 
3620 .vb
3621             1  2  0  |  0  3  0  |  0  4
3622     Proc0   0  5  6  |  7  0  0  |  8  0
3623             9  0 10  | 11  0  0  | 12  0
3624     -------------------------------------
3625            13  0 14  | 15 16 17  |  0  0
3626     Proc1   0 18  0  | 19 20 21  |  0  0
3627             0  0  0  | 22 23  0  | 24  0
3628     -------------------------------------
3629     Proc2  25 26 27  |  0  0 28  | 29  0
3630            30  0  0  | 31 32 33  |  0 34
3631 .ve
3632 
3633    This can be represented as a collection of submatrices as:
3634 
3635 .vb
3636       A B C
3637       D E F
3638       G H I
3639 .ve
3640 
3641    Where the submatrices A,B,C are owned by proc0, D,E,F are
3642    owned by proc1, G,H,I are owned by proc2.
3643 
3644    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3645    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3646    The 'M','N' parameters are 8,8, and have the same values on all procs.
3647 
3648    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3649    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3650    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3651    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3652    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3653    matrix, ans [DF] as another SeqAIJ matrix.
3654 
3655    When d_nz, o_nz parameters are specified, d_nz storage elements are
3656    allocated for every row of the local diagonal submatrix, and o_nz
3657    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3658    One way to choose d_nz and o_nz is to use the max nonzerors per local
3659    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3660    In this case, the values of d_nz,o_nz are:
3661 .vb
3662      proc0 : dnz = 2, o_nz = 2
3663      proc1 : dnz = 3, o_nz = 2
3664      proc2 : dnz = 1, o_nz = 4
3665 .ve
3666    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3667    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3668    for proc3. i.e we are using 12+15+10=37 storage locations to store
3669    34 values.
3670 
3671    When d_nnz, o_nnz parameters are specified, the storage is specified
3672    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3673    In the above case the values for d_nnz,o_nnz are:
3674 .vb
3675      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3676      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3677      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3678 .ve
3679    Here the space allocated is sum of all the above values i.e 34, and
3680    hence pre-allocation is perfect.
3681 
3682    Level: intermediate
3683 
3684 .keywords: matrix, aij, compressed row, sparse, parallel
3685 
3686 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3687           MPIAIJ, MatCreateMPIAIJWithArrays()
3688 @*/
3689 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3690 {
3691   PetscErrorCode ierr;
3692   PetscMPIInt    size;
3693 
3694   PetscFunctionBegin;
3695   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3696   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3697   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3698   if (size > 1) {
3699     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3700     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3701   } else {
3702     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3703     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3704   }
3705   PetscFunctionReturn(0);
3706 }
3707 
3708 #undef __FUNCT__
3709 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3710 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3711 {
3712   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3713 
3714   PetscFunctionBegin;
3715   *Ad     = a->A;
3716   *Ao     = a->B;
3717   *colmap = a->garray;
3718   PetscFunctionReturn(0);
3719 }
3720 
3721 #undef __FUNCT__
3722 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3723 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3724 {
3725   PetscErrorCode ierr;
3726   PetscInt       i;
3727   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3728 
3729   PetscFunctionBegin;
3730   if (coloring->ctype == IS_COLORING_GLOBAL) {
3731     ISColoringValue *allcolors,*colors;
3732     ISColoring      ocoloring;
3733 
3734     /* set coloring for diagonal portion */
3735     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3736 
3737     /* set coloring for off-diagonal portion */
3738     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3739     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3740     for (i=0; i<a->B->cmap->n; i++) {
3741       colors[i] = allcolors[a->garray[i]];
3742     }
3743     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3744     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3745     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3746     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3747   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3748     ISColoringValue *colors;
3749     PetscInt        *larray;
3750     ISColoring      ocoloring;
3751 
3752     /* set coloring for diagonal portion */
3753     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3754     for (i=0; i<a->A->cmap->n; i++) {
3755       larray[i] = i + A->cmap->rstart;
3756     }
3757     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3758     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3759     for (i=0; i<a->A->cmap->n; i++) {
3760       colors[i] = coloring->colors[larray[i]];
3761     }
3762     ierr = PetscFree(larray);CHKERRQ(ierr);
3763     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3764     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3765     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3766 
3767     /* set coloring for off-diagonal portion */
3768     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3769     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3770     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3771     for (i=0; i<a->B->cmap->n; i++) {
3772       colors[i] = coloring->colors[larray[i]];
3773     }
3774     ierr = PetscFree(larray);CHKERRQ(ierr);
3775     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3776     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3777     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3778   } else {
3779     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3780   }
3781 
3782   PetscFunctionReturn(0);
3783 }
3784 
3785 #if defined(PETSC_HAVE_ADIC)
3786 #undef __FUNCT__
3787 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3788 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3789 {
3790   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3791   PetscErrorCode ierr;
3792 
3793   PetscFunctionBegin;
3794   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3795   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3796   PetscFunctionReturn(0);
3797 }
3798 #endif
3799 
3800 #undef __FUNCT__
3801 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3802 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3803 {
3804   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3805   PetscErrorCode ierr;
3806 
3807   PetscFunctionBegin;
3808   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3809   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3810   PetscFunctionReturn(0);
3811 }
3812 
3813 #undef __FUNCT__
3814 #define __FUNCT__ "MatMerge"
3815 /*@
3816       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3817                  matrices from each processor
3818 
3819     Collective on MPI_Comm
3820 
3821    Input Parameters:
3822 +    comm - the communicators the parallel matrix will live on
3823 .    inmat - the input sequential matrices
3824 .    n - number of local columns (or PETSC_DECIDE)
3825 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3826 
3827    Output Parameter:
3828 .    outmat - the parallel matrix generated
3829 
3830     Level: advanced
3831 
3832    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3833 
3834 @*/
3835 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3836 {
3837   PetscErrorCode ierr;
3838   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3839   PetscInt       *indx;
3840   PetscScalar    *values;
3841 
3842   PetscFunctionBegin;
3843   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3844   if (scall == MAT_INITIAL_MATRIX){
3845     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3846     if (n == PETSC_DECIDE){
3847       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3848     }
3849     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3850     rstart -= m;
3851 
3852     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3853     for (i=0;i<m;i++) {
3854       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3855       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3856       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3857     }
3858     /* This routine will ONLY return MPIAIJ type matrix */
3859     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3860     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3861     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3862     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3863     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3864 
3865   } else if (scall == MAT_REUSE_MATRIX){
3866     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3867   } else {
3868     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3869   }
3870 
3871   for (i=0;i<m;i++) {
3872     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3873     Ii    = i + rstart;
3874     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3875     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3876   }
3877   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3878   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3879   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3880 
3881   PetscFunctionReturn(0);
3882 }
3883 
3884 #undef __FUNCT__
3885 #define __FUNCT__ "MatFileSplit"
3886 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3887 {
3888   PetscErrorCode    ierr;
3889   PetscMPIInt       rank;
3890   PetscInt          m,N,i,rstart,nnz;
3891   size_t            len;
3892   const PetscInt    *indx;
3893   PetscViewer       out;
3894   char              *name;
3895   Mat               B;
3896   const PetscScalar *values;
3897 
3898   PetscFunctionBegin;
3899   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3900   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3901   /* Should this be the type of the diagonal block of A? */
3902   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3903   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3904   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3905   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3906   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3907   for (i=0;i<m;i++) {
3908     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3909     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3910     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3911   }
3912   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3913   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3914 
3915   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3916   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3917   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3918   sprintf(name,"%s.%d",outfile,rank);
3919   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3920   ierr = PetscFree(name);
3921   ierr = MatView(B,out);CHKERRQ(ierr);
3922   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3923   ierr = MatDestroy(B);CHKERRQ(ierr);
3924   PetscFunctionReturn(0);
3925 }
3926 
3927 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3928 #undef __FUNCT__
3929 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3930 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3931 {
3932   PetscErrorCode       ierr;
3933   Mat_Merge_SeqsToMPI  *merge;
3934   PetscContainer       container;
3935 
3936   PetscFunctionBegin;
3937   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3938   if (container) {
3939     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3940     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3941     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3942     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3943     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3944     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3945     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3946     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3947     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3948     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3949     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3950     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
3951 
3952     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
3953     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3954   }
3955   ierr = PetscFree(merge);CHKERRQ(ierr);
3956 
3957   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3958   PetscFunctionReturn(0);
3959 }
3960 
3961 #include "../src/mat/utils/freespace.h"
3962 #include "petscbt.h"
3963 
3964 #undef __FUNCT__
3965 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3966 /*@C
3967       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3968                  matrices from each processor
3969 
3970     Collective on MPI_Comm
3971 
3972    Input Parameters:
3973 +    comm - the communicators the parallel matrix will live on
3974 .    seqmat - the input sequential matrices
3975 .    m - number of local rows (or PETSC_DECIDE)
3976 .    n - number of local columns (or PETSC_DECIDE)
3977 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3978 
3979    Output Parameter:
3980 .    mpimat - the parallel matrix generated
3981 
3982     Level: advanced
3983 
3984    Notes:
3985      The dimensions of the sequential matrix in each processor MUST be the same.
3986      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3987      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3988 @*/
3989 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3990 {
3991   PetscErrorCode       ierr;
3992   MPI_Comm             comm=((PetscObject)mpimat)->comm;
3993   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3994   PetscMPIInt          size,rank,taga,*len_s;
3995   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
3996   PetscInt             proc,m;
3997   PetscInt             **buf_ri,**buf_rj;
3998   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3999   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4000   MPI_Request          *s_waits,*r_waits;
4001   MPI_Status           *status;
4002   MatScalar            *aa=a->a;
4003   MatScalar            **abuf_r,*ba_i;
4004   Mat_Merge_SeqsToMPI  *merge;
4005   PetscContainer       container;
4006 
4007   PetscFunctionBegin;
4008   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4009 
4010   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4011   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4012 
4013   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4014   if (container) {
4015     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4016   }
4017   bi     = merge->bi;
4018   bj     = merge->bj;
4019   buf_ri = merge->buf_ri;
4020   buf_rj = merge->buf_rj;
4021 
4022   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4023   owners = merge->rowmap.range;
4024   len_s  = merge->len_s;
4025 
4026   /* send and recv matrix values */
4027   /*-----------------------------*/
4028   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4029   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4030 
4031   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4032   for (proc=0,k=0; proc<size; proc++){
4033     if (!len_s[proc]) continue;
4034     i = owners[proc];
4035     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4036     k++;
4037   }
4038 
4039   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4040   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4041   ierr = PetscFree(status);CHKERRQ(ierr);
4042 
4043   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4044   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4045 
4046   /* insert mat values of mpimat */
4047   /*----------------------------*/
4048   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4049   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4050   nextrow = buf_ri_k + merge->nrecv;
4051   nextai  = nextrow + merge->nrecv;
4052 
4053   for (k=0; k<merge->nrecv; k++){
4054     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4055     nrows = *(buf_ri_k[k]);
4056     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4057     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4058   }
4059 
4060   /* set values of ba */
4061   m = merge->rowmap.n;
4062   for (i=0; i<m; i++) {
4063     arow = owners[rank] + i;
4064     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4065     bnzi = bi[i+1] - bi[i];
4066     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4067 
4068     /* add local non-zero vals of this proc's seqmat into ba */
4069     anzi = ai[arow+1] - ai[arow];
4070     aj   = a->j + ai[arow];
4071     aa   = a->a + ai[arow];
4072     nextaj = 0;
4073     for (j=0; nextaj<anzi; j++){
4074       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4075         ba_i[j] += aa[nextaj++];
4076       }
4077     }
4078 
4079     /* add received vals into ba */
4080     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4081       /* i-th row */
4082       if (i == *nextrow[k]) {
4083         anzi = *(nextai[k]+1) - *nextai[k];
4084         aj   = buf_rj[k] + *(nextai[k]);
4085         aa   = abuf_r[k] + *(nextai[k]);
4086         nextaj = 0;
4087         for (j=0; nextaj<anzi; j++){
4088           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4089             ba_i[j] += aa[nextaj++];
4090           }
4091         }
4092         nextrow[k]++; nextai[k]++;
4093       }
4094     }
4095     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4096   }
4097   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4098   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4099 
4100   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4101   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4102   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4103   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4104   PetscFunctionReturn(0);
4105 }
4106 
4107 #undef __FUNCT__
4108 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4109 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4110 {
4111   PetscErrorCode       ierr;
4112   Mat                  B_mpi;
4113   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4114   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4115   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4116   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4117   PetscInt             len,proc,*dnz,*onz;
4118   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4119   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4120   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4121   MPI_Status           *status;
4122   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4123   PetscBT              lnkbt;
4124   Mat_Merge_SeqsToMPI  *merge;
4125   PetscContainer       container;
4126 
4127   PetscFunctionBegin;
4128   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4129 
4130   /* make sure it is a PETSc comm */
4131   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4132   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4133   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4134 
4135   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4136   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4137 
4138   /* determine row ownership */
4139   /*---------------------------------------------------------*/
4140   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
4141   merge->rowmap.n = m;
4142   merge->rowmap.N = M;
4143   merge->rowmap.bs = 1;
4144   ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr);
4145   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4146   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4147 
4148   m      = merge->rowmap.n;
4149   M      = merge->rowmap.N;
4150   owners = merge->rowmap.range;
4151 
4152   /* determine the number of messages to send, their lengths */
4153   /*---------------------------------------------------------*/
4154   len_s  = merge->len_s;
4155 
4156   len = 0;  /* length of buf_si[] */
4157   merge->nsend = 0;
4158   for (proc=0; proc<size; proc++){
4159     len_si[proc] = 0;
4160     if (proc == rank){
4161       len_s[proc] = 0;
4162     } else {
4163       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4164       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4165     }
4166     if (len_s[proc]) {
4167       merge->nsend++;
4168       nrows = 0;
4169       for (i=owners[proc]; i<owners[proc+1]; i++){
4170         if (ai[i+1] > ai[i]) nrows++;
4171       }
4172       len_si[proc] = 2*(nrows+1);
4173       len += len_si[proc];
4174     }
4175   }
4176 
4177   /* determine the number and length of messages to receive for ij-structure */
4178   /*-------------------------------------------------------------------------*/
4179   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4180   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4181 
4182   /* post the Irecv of j-structure */
4183   /*-------------------------------*/
4184   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4185   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4186 
4187   /* post the Isend of j-structure */
4188   /*--------------------------------*/
4189   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
4190   sj_waits = si_waits + merge->nsend;
4191 
4192   for (proc=0, k=0; proc<size; proc++){
4193     if (!len_s[proc]) continue;
4194     i = owners[proc];
4195     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4196     k++;
4197   }
4198 
4199   /* receives and sends of j-structure are complete */
4200   /*------------------------------------------------*/
4201   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4202   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4203 
4204   /* send and recv i-structure */
4205   /*---------------------------*/
4206   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4207   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4208 
4209   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4210   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4211   for (proc=0,k=0; proc<size; proc++){
4212     if (!len_s[proc]) continue;
4213     /* form outgoing message for i-structure:
4214          buf_si[0]:                 nrows to be sent
4215                [1:nrows]:           row index (global)
4216                [nrows+1:2*nrows+1]: i-structure index
4217     */
4218     /*-------------------------------------------*/
4219     nrows = len_si[proc]/2 - 1;
4220     buf_si_i    = buf_si + nrows+1;
4221     buf_si[0]   = nrows;
4222     buf_si_i[0] = 0;
4223     nrows = 0;
4224     for (i=owners[proc]; i<owners[proc+1]; i++){
4225       anzi = ai[i+1] - ai[i];
4226       if (anzi) {
4227         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4228         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4229         nrows++;
4230       }
4231     }
4232     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4233     k++;
4234     buf_si += len_si[proc];
4235   }
4236 
4237   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4238   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4239 
4240   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4241   for (i=0; i<merge->nrecv; i++){
4242     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4243   }
4244 
4245   ierr = PetscFree(len_si);CHKERRQ(ierr);
4246   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4247   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4248   ierr = PetscFree(si_waits);CHKERRQ(ierr);
4249   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4250   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4251   ierr = PetscFree(status);CHKERRQ(ierr);
4252 
4253   /* compute a local seq matrix in each processor */
4254   /*----------------------------------------------*/
4255   /* allocate bi array and free space for accumulating nonzero column info */
4256   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4257   bi[0] = 0;
4258 
4259   /* create and initialize a linked list */
4260   nlnk = N+1;
4261   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4262 
4263   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4264   len = 0;
4265   len  = ai[owners[rank+1]] - ai[owners[rank]];
4266   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4267   current_space = free_space;
4268 
4269   /* determine symbolic info for each local row */
4270   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4271   nextrow = buf_ri_k + merge->nrecv;
4272   nextai  = nextrow + merge->nrecv;
4273   for (k=0; k<merge->nrecv; k++){
4274     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4275     nrows = *buf_ri_k[k];
4276     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4277     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4278   }
4279 
4280   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4281   len = 0;
4282   for (i=0;i<m;i++) {
4283     bnzi   = 0;
4284     /* add local non-zero cols of this proc's seqmat into lnk */
4285     arow   = owners[rank] + i;
4286     anzi   = ai[arow+1] - ai[arow];
4287     aj     = a->j + ai[arow];
4288     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4289     bnzi += nlnk;
4290     /* add received col data into lnk */
4291     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4292       if (i == *nextrow[k]) { /* i-th row */
4293         anzi = *(nextai[k]+1) - *nextai[k];
4294         aj   = buf_rj[k] + *nextai[k];
4295         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4296         bnzi += nlnk;
4297         nextrow[k]++; nextai[k]++;
4298       }
4299     }
4300     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4301 
4302     /* if free space is not available, make more free space */
4303     if (current_space->local_remaining<bnzi) {
4304       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4305       nspacedouble++;
4306     }
4307     /* copy data into free space, then initialize lnk */
4308     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4309     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4310 
4311     current_space->array           += bnzi;
4312     current_space->local_used      += bnzi;
4313     current_space->local_remaining -= bnzi;
4314 
4315     bi[i+1] = bi[i] + bnzi;
4316   }
4317 
4318   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4319 
4320   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4321   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4322   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4323 
4324   /* create symbolic parallel matrix B_mpi */
4325   /*---------------------------------------*/
4326   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4327   if (n==PETSC_DECIDE) {
4328     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4329   } else {
4330     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4331   }
4332   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4333   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4334   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4335 
4336   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4337   B_mpi->assembled     = PETSC_FALSE;
4338   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4339   merge->bi            = bi;
4340   merge->bj            = bj;
4341   merge->buf_ri        = buf_ri;
4342   merge->buf_rj        = buf_rj;
4343   merge->coi           = PETSC_NULL;
4344   merge->coj           = PETSC_NULL;
4345   merge->owners_co     = PETSC_NULL;
4346 
4347   /* attach the supporting struct to B_mpi for reuse */
4348   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4349   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4350   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4351   *mpimat = B_mpi;
4352 
4353   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4354   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4355   PetscFunctionReturn(0);
4356 }
4357 
4358 #undef __FUNCT__
4359 #define __FUNCT__ "MatMerge_SeqsToMPI"
4360 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4361 {
4362   PetscErrorCode   ierr;
4363 
4364   PetscFunctionBegin;
4365   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4366   if (scall == MAT_INITIAL_MATRIX){
4367     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4368   }
4369   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4370   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4371   PetscFunctionReturn(0);
4372 }
4373 
4374 #undef __FUNCT__
4375 #define __FUNCT__ "MatGetLocalMat"
4376 /*@
4377      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4378 
4379     Not Collective
4380 
4381    Input Parameters:
4382 +    A - the matrix
4383 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4384 
4385    Output Parameter:
4386 .    A_loc - the local sequential matrix generated
4387 
4388     Level: developer
4389 
4390 @*/
4391 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4392 {
4393   PetscErrorCode  ierr;
4394   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4395   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4396   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4397   MatScalar       *aa=a->a,*ba=b->a,*cam;
4398   PetscScalar     *ca;
4399   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4400   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4401 
4402   PetscFunctionBegin;
4403   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4404   if (scall == MAT_INITIAL_MATRIX){
4405     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4406     ci[0] = 0;
4407     for (i=0; i<am; i++){
4408       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4409     }
4410     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4411     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4412     k = 0;
4413     for (i=0; i<am; i++) {
4414       ncols_o = bi[i+1] - bi[i];
4415       ncols_d = ai[i+1] - ai[i];
4416       /* off-diagonal portion of A */
4417       for (jo=0; jo<ncols_o; jo++) {
4418         col = cmap[*bj];
4419         if (col >= cstart) break;
4420         cj[k]   = col; bj++;
4421         ca[k++] = *ba++;
4422       }
4423       /* diagonal portion of A */
4424       for (j=0; j<ncols_d; j++) {
4425         cj[k]   = cstart + *aj++;
4426         ca[k++] = *aa++;
4427       }
4428       /* off-diagonal portion of A */
4429       for (j=jo; j<ncols_o; j++) {
4430         cj[k]   = cmap[*bj++];
4431         ca[k++] = *ba++;
4432       }
4433     }
4434     /* put together the new matrix */
4435     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4436     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4437     /* Since these are PETSc arrays, change flags to free them as necessary. */
4438     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4439     mat->free_a  = PETSC_TRUE;
4440     mat->free_ij = PETSC_TRUE;
4441     mat->nonew   = 0;
4442   } else if (scall == MAT_REUSE_MATRIX){
4443     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4444     ci = mat->i; cj = mat->j; cam = mat->a;
4445     for (i=0; i<am; i++) {
4446       /* off-diagonal portion of A */
4447       ncols_o = bi[i+1] - bi[i];
4448       for (jo=0; jo<ncols_o; jo++) {
4449         col = cmap[*bj];
4450         if (col >= cstart) break;
4451         *cam++ = *ba++; bj++;
4452       }
4453       /* diagonal portion of A */
4454       ncols_d = ai[i+1] - ai[i];
4455       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4456       /* off-diagonal portion of A */
4457       for (j=jo; j<ncols_o; j++) {
4458         *cam++ = *ba++; bj++;
4459       }
4460     }
4461   } else {
4462     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4463   }
4464 
4465   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4466   PetscFunctionReturn(0);
4467 }
4468 
4469 #undef __FUNCT__
4470 #define __FUNCT__ "MatGetLocalMatCondensed"
4471 /*@C
4472      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4473 
4474     Not Collective
4475 
4476    Input Parameters:
4477 +    A - the matrix
4478 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4479 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4480 
4481    Output Parameter:
4482 .    A_loc - the local sequential matrix generated
4483 
4484     Level: developer
4485 
4486 @*/
4487 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4488 {
4489   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4490   PetscErrorCode    ierr;
4491   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4492   IS                isrowa,iscola;
4493   Mat               *aloc;
4494 
4495   PetscFunctionBegin;
4496   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4497   if (!row){
4498     start = A->rmap->rstart; end = A->rmap->rend;
4499     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4500   } else {
4501     isrowa = *row;
4502   }
4503   if (!col){
4504     start = A->cmap->rstart;
4505     cmap  = a->garray;
4506     nzA   = a->A->cmap->n;
4507     nzB   = a->B->cmap->n;
4508     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4509     ncols = 0;
4510     for (i=0; i<nzB; i++) {
4511       if (cmap[i] < start) idx[ncols++] = cmap[i];
4512       else break;
4513     }
4514     imark = i;
4515     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4516     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4517     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4518     ierr = PetscFree(idx);CHKERRQ(ierr);
4519   } else {
4520     iscola = *col;
4521   }
4522   if (scall != MAT_INITIAL_MATRIX){
4523     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4524     aloc[0] = *A_loc;
4525   }
4526   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4527   *A_loc = aloc[0];
4528   ierr = PetscFree(aloc);CHKERRQ(ierr);
4529   if (!row){
4530     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4531   }
4532   if (!col){
4533     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4534   }
4535   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4536   PetscFunctionReturn(0);
4537 }
4538 
4539 #undef __FUNCT__
4540 #define __FUNCT__ "MatGetBrowsOfAcols"
4541 /*@C
4542     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4543 
4544     Collective on Mat
4545 
4546    Input Parameters:
4547 +    A,B - the matrices in mpiaij format
4548 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4549 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4550 
4551    Output Parameter:
4552 +    rowb, colb - index sets of rows and columns of B to extract
4553 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4554 -    B_seq - the sequential matrix generated
4555 
4556     Level: developer
4557 
4558 @*/
4559 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4560 {
4561   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4562   PetscErrorCode    ierr;
4563   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4564   IS                isrowb,iscolb;
4565   Mat               *bseq;
4566 
4567   PetscFunctionBegin;
4568   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4569     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4570   }
4571   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4572 
4573   if (scall == MAT_INITIAL_MATRIX){
4574     start = A->cmap->rstart;
4575     cmap  = a->garray;
4576     nzA   = a->A->cmap->n;
4577     nzB   = a->B->cmap->n;
4578     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4579     ncols = 0;
4580     for (i=0; i<nzB; i++) {  /* row < local row index */
4581       if (cmap[i] < start) idx[ncols++] = cmap[i];
4582       else break;
4583     }
4584     imark = i;
4585     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4586     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4587     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4588     ierr = PetscFree(idx);CHKERRQ(ierr);
4589     *brstart = imark;
4590     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4591   } else {
4592     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4593     isrowb = *rowb; iscolb = *colb;
4594     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4595     bseq[0] = *B_seq;
4596   }
4597   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4598   *B_seq = bseq[0];
4599   ierr = PetscFree(bseq);CHKERRQ(ierr);
4600   if (!rowb){
4601     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4602   } else {
4603     *rowb = isrowb;
4604   }
4605   if (!colb){
4606     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4607   } else {
4608     *colb = iscolb;
4609   }
4610   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4611   PetscFunctionReturn(0);
4612 }
4613 
4614 #undef __FUNCT__
4615 #define __FUNCT__ "MatGetBrowsOfAoCols"
4616 /*@C
4617     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4618     of the OFF-DIAGONAL portion of local A
4619 
4620     Collective on Mat
4621 
4622    Input Parameters:
4623 +    A,B - the matrices in mpiaij format
4624 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4625 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4626 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4627 
4628    Output Parameter:
4629 +    B_oth - the sequential matrix generated
4630 
4631     Level: developer
4632 
4633 @*/
4634 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth)
4635 {
4636   VecScatter_MPI_General *gen_to,*gen_from;
4637   PetscErrorCode         ierr;
4638   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4639   Mat_SeqAIJ             *b_oth;
4640   VecScatter             ctx=a->Mvctx;
4641   MPI_Comm               comm=((PetscObject)ctx)->comm;
4642   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4643   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4644   PetscScalar            *rvalues,*svalues;
4645   MatScalar              *b_otha,*bufa,*bufA;
4646   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4647   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4648   MPI_Status             *sstatus,rstatus;
4649   PetscMPIInt            jj;
4650   PetscInt               *cols,sbs,rbs;
4651   PetscScalar            *vals;
4652 
4653   PetscFunctionBegin;
4654   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4655     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4656   }
4657   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4658   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4659 
4660   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4661   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4662   rvalues  = gen_from->values; /* holds the length of receiving row */
4663   svalues  = gen_to->values;   /* holds the length of sending row */
4664   nrecvs   = gen_from->n;
4665   nsends   = gen_to->n;
4666 
4667   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4668   srow     = gen_to->indices;   /* local row index to be sent */
4669   sstarts  = gen_to->starts;
4670   sprocs   = gen_to->procs;
4671   sstatus  = gen_to->sstatus;
4672   sbs      = gen_to->bs;
4673   rstarts  = gen_from->starts;
4674   rprocs   = gen_from->procs;
4675   rbs      = gen_from->bs;
4676 
4677   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4678   if (scall == MAT_INITIAL_MATRIX){
4679     /* i-array */
4680     /*---------*/
4681     /*  post receives */
4682     for (i=0; i<nrecvs; i++){
4683       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4684       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4685       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4686     }
4687 
4688     /* pack the outgoing message */
4689     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4690     rstartsj = sstartsj + nsends +1;
4691     sstartsj[0] = 0;  rstartsj[0] = 0;
4692     len = 0; /* total length of j or a array to be sent */
4693     k = 0;
4694     for (i=0; i<nsends; i++){
4695       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4696       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4697       for (j=0; j<nrows; j++) {
4698         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4699         for (l=0; l<sbs; l++){
4700           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4701           rowlen[j*sbs+l] = ncols;
4702           len += ncols;
4703           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4704         }
4705         k++;
4706       }
4707       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4708       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4709     }
4710     /* recvs and sends of i-array are completed */
4711     i = nrecvs;
4712     while (i--) {
4713       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4714     }
4715     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4716 
4717     /* allocate buffers for sending j and a arrays */
4718     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4719     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4720 
4721     /* create i-array of B_oth */
4722     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4723     b_othi[0] = 0;
4724     len = 0; /* total length of j or a array to be received */
4725     k = 0;
4726     for (i=0; i<nrecvs; i++){
4727       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4728       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4729       for (j=0; j<nrows; j++) {
4730         b_othi[k+1] = b_othi[k] + rowlen[j];
4731         len += rowlen[j]; k++;
4732       }
4733       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4734     }
4735 
4736     /* allocate space for j and a arrrays of B_oth */
4737     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4738     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4739 
4740     /* j-array */
4741     /*---------*/
4742     /*  post receives of j-array */
4743     for (i=0; i<nrecvs; i++){
4744       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4745       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4746     }
4747 
4748     /* pack the outgoing message j-array */
4749     k = 0;
4750     for (i=0; i<nsends; i++){
4751       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4752       bufJ = bufj+sstartsj[i];
4753       for (j=0; j<nrows; j++) {
4754         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4755         for (ll=0; ll<sbs; ll++){
4756           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4757           for (l=0; l<ncols; l++){
4758             *bufJ++ = cols[l];
4759           }
4760           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4761         }
4762       }
4763       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4764     }
4765 
4766     /* recvs and sends of j-array are completed */
4767     i = nrecvs;
4768     while (i--) {
4769       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4770     }
4771     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4772   } else if (scall == MAT_REUSE_MATRIX){
4773     sstartsj = *startsj;
4774     rstartsj = sstartsj + nsends +1;
4775     bufa     = *bufa_ptr;
4776     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4777     b_otha   = b_oth->a;
4778   } else {
4779     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4780   }
4781 
4782   /* a-array */
4783   /*---------*/
4784   /*  post receives of a-array */
4785   for (i=0; i<nrecvs; i++){
4786     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4787     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4788   }
4789 
4790   /* pack the outgoing message a-array */
4791   k = 0;
4792   for (i=0; i<nsends; i++){
4793     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4794     bufA = bufa+sstartsj[i];
4795     for (j=0; j<nrows; j++) {
4796       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4797       for (ll=0; ll<sbs; ll++){
4798         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4799         for (l=0; l<ncols; l++){
4800           *bufA++ = vals[l];
4801         }
4802         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4803       }
4804     }
4805     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4806   }
4807   /* recvs and sends of a-array are completed */
4808   i = nrecvs;
4809   while (i--) {
4810     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4811   }
4812   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4813   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4814 
4815   if (scall == MAT_INITIAL_MATRIX){
4816     /* put together the new matrix */
4817     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4818 
4819     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4820     /* Since these are PETSc arrays, change flags to free them as necessary. */
4821     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4822     b_oth->free_a  = PETSC_TRUE;
4823     b_oth->free_ij = PETSC_TRUE;
4824     b_oth->nonew   = 0;
4825 
4826     ierr = PetscFree(bufj);CHKERRQ(ierr);
4827     if (!startsj || !bufa_ptr){
4828       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4829       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4830     } else {
4831       *startsj  = sstartsj;
4832       *bufa_ptr = bufa;
4833     }
4834   }
4835   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4836   PetscFunctionReturn(0);
4837 }
4838 
4839 #undef __FUNCT__
4840 #define __FUNCT__ "MatGetCommunicationStructs"
4841 /*@C
4842   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4843 
4844   Not Collective
4845 
4846   Input Parameters:
4847 . A - The matrix in mpiaij format
4848 
4849   Output Parameter:
4850 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4851 . colmap - A map from global column index to local index into lvec
4852 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4853 
4854   Level: developer
4855 
4856 @*/
4857 #if defined (PETSC_USE_CTABLE)
4858 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4859 #else
4860 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4861 #endif
4862 {
4863   Mat_MPIAIJ *a;
4864 
4865   PetscFunctionBegin;
4866   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4867   PetscValidPointer(lvec, 2)
4868   PetscValidPointer(colmap, 3)
4869   PetscValidPointer(multScatter, 4)
4870   a = (Mat_MPIAIJ *) A->data;
4871   if (lvec) *lvec = a->lvec;
4872   if (colmap) *colmap = a->colmap;
4873   if (multScatter) *multScatter = a->Mvctx;
4874   PetscFunctionReturn(0);
4875 }
4876 
4877 EXTERN_C_BEGIN
4878 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*);
4879 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*);
4880 EXTERN_C_END
4881 
4882 #include "../src/mat/impls/dense/mpi/mpidense.h"
4883 
4884 #undef __FUNCT__
4885 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4886 /*
4887     Computes (B'*A')' since computing B*A directly is untenable
4888 
4889                n                       p                          p
4890         (              )       (              )         (                  )
4891       m (      A       )  *  n (       B      )   =   m (         C        )
4892         (              )       (              )         (                  )
4893 
4894 */
4895 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4896 {
4897   PetscErrorCode     ierr;
4898   Mat                At,Bt,Ct;
4899 
4900   PetscFunctionBegin;
4901   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4902   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4903   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4904   ierr = MatDestroy(At);CHKERRQ(ierr);
4905   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4906   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4907   ierr = MatDestroy(Ct);CHKERRQ(ierr);
4908   PetscFunctionReturn(0);
4909 }
4910 
4911 #undef __FUNCT__
4912 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4913 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4914 {
4915   PetscErrorCode ierr;
4916   PetscInt       m=A->rmap->n,n=B->cmap->n;
4917   Mat            Cmat;
4918 
4919   PetscFunctionBegin;
4920   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4921   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
4922   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4923   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4924   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
4925   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4926   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4927   *C   = Cmat;
4928   PetscFunctionReturn(0);
4929 }
4930 
4931 /* ----------------------------------------------------------------*/
4932 #undef __FUNCT__
4933 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4934 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4935 {
4936   PetscErrorCode ierr;
4937 
4938   PetscFunctionBegin;
4939   if (scall == MAT_INITIAL_MATRIX){
4940     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4941   }
4942   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4943   PetscFunctionReturn(0);
4944 }
4945 
4946 EXTERN_C_BEGIN
4947 #if defined(PETSC_HAVE_MUMPS)
4948 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*);
4949 #endif
4950 #if defined(PETSC_HAVE_PASTIX)
4951 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
4952 #endif
4953 #if defined(PETSC_HAVE_SUPERLU_DIST)
4954 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
4955 #endif
4956 #if defined(PETSC_HAVE_SPOOLES)
4957 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
4958 #endif
4959 EXTERN_C_END
4960 
4961 /*MC
4962    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4963 
4964    Options Database Keys:
4965 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4966 
4967   Level: beginner
4968 
4969 .seealso: MatCreateMPIAIJ()
4970 M*/
4971 
4972 EXTERN_C_BEGIN
4973 #undef __FUNCT__
4974 #define __FUNCT__ "MatCreate_MPIAIJ"
4975 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
4976 {
4977   Mat_MPIAIJ     *b;
4978   PetscErrorCode ierr;
4979   PetscMPIInt    size;
4980 
4981   PetscFunctionBegin;
4982   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
4983 
4984   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
4985   B->data         = (void*)b;
4986   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4987   B->rmap->bs      = 1;
4988   B->assembled    = PETSC_FALSE;
4989   B->mapping      = 0;
4990 
4991   B->insertmode      = NOT_SET_VALUES;
4992   b->size            = size;
4993   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
4994 
4995   /* build cache for off array entries formed */
4996   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
4997   b->donotstash  = PETSC_FALSE;
4998   b->colmap      = 0;
4999   b->garray      = 0;
5000   b->roworiented = PETSC_TRUE;
5001 
5002   /* stuff used for matrix vector multiply */
5003   b->lvec      = PETSC_NULL;
5004   b->Mvctx     = PETSC_NULL;
5005 
5006   /* stuff for MatGetRow() */
5007   b->rowindices   = 0;
5008   b->rowvalues    = 0;
5009   b->getrowactive = PETSC_FALSE;
5010 
5011 #if defined(PETSC_HAVE_SPOOLES)
5012   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_spooles_C",
5013                                      "MatGetFactor_mpiaij_spooles",
5014                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5015 #endif
5016 #if defined(PETSC_HAVE_MUMPS)
5017   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_mumps_C",
5018                                      "MatGetFactor_mpiaij_mumps",
5019                                      MatGetFactor_mpiaij_mumps);CHKERRQ(ierr);
5020 #endif
5021 #if defined(PETSC_HAVE_PASTIX)
5022   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_pastix_C",
5023 					   "MatGetFactor_mpiaij_pastix",
5024 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5025 #endif
5026 #if defined(PETSC_HAVE_SUPERLU_DIST)
5027   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_superlu_dist_C",
5028                                      "MatGetFactor_mpiaij_superlu_dist",
5029                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5030 #endif
5031   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5032                                      "MatStoreValues_MPIAIJ",
5033                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5034   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5035                                      "MatRetrieveValues_MPIAIJ",
5036                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5037   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5038 				     "MatGetDiagonalBlock_MPIAIJ",
5039                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5040   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5041 				     "MatIsTranspose_MPIAIJ",
5042 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5043   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5044 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5045 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5046   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5047 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5048 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5049   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5050 				     "MatDiagonalScaleLocal_MPIAIJ",
5051 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5052   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
5053                                      "MatConvert_MPIAIJ_MPICSRPERM",
5054                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
5055   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
5056                                      "MatConvert_MPIAIJ_MPICRL",
5057                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
5058   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5059                                      "MatMatMult_MPIDense_MPIAIJ",
5060                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5061   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5062                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5063                                       MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5064   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5065                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5066                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5067   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5068   PetscFunctionReturn(0);
5069 }
5070 EXTERN_C_END
5071 
5072 #undef __FUNCT__
5073 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5074 /*@
5075      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5076          and "off-diagonal" part of the matrix in CSR format.
5077 
5078    Collective on MPI_Comm
5079 
5080    Input Parameters:
5081 +  comm - MPI communicator
5082 .  m - number of local rows (Cannot be PETSC_DECIDE)
5083 .  n - This value should be the same as the local size used in creating the
5084        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5085        calculated if N is given) For square matrices n is almost always m.
5086 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5087 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5088 .   i - row indices for "diagonal" portion of matrix
5089 .   j - column indices
5090 .   a - matrix values
5091 .   oi - row indices for "off-diagonal" portion of matrix
5092 .   oj - column indices
5093 -   oa - matrix values
5094 
5095    Output Parameter:
5096 .   mat - the matrix
5097 
5098    Level: advanced
5099 
5100    Notes:
5101        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5102 
5103        The i and j indices are 0 based
5104 
5105        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5106 
5107 
5108 .keywords: matrix, aij, compressed row, sparse, parallel
5109 
5110 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5111           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5112 @*/
5113 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5114 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5115 {
5116   PetscErrorCode ierr;
5117   Mat_MPIAIJ     *maij;
5118 
5119  PetscFunctionBegin;
5120   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5121   if (i[0]) {
5122     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5123   }
5124   if (oi[0]) {
5125     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5126   }
5127   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5128   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5129   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5130   maij = (Mat_MPIAIJ*) (*mat)->data;
5131   maij->donotstash     = PETSC_TRUE;
5132   (*mat)->preallocated = PETSC_TRUE;
5133 
5134   ierr = PetscMapSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5135   ierr = PetscMapSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5136   ierr = PetscMapSetUp((*mat)->rmap);CHKERRQ(ierr);
5137   ierr = PetscMapSetUp((*mat)->cmap);CHKERRQ(ierr);
5138 
5139   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5140   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5141 
5142   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5143   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5144   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5145   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5146 
5147   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5148   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5149   PetscFunctionReturn(0);
5150 }
5151 
5152 /*
5153     Special version for direct calls from Fortran
5154 */
5155 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5156 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5157 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5158 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5159 #endif
5160 
5161 /* Change these macros so can be used in void function */
5162 #undef CHKERRQ
5163 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5164 #undef SETERRQ2
5165 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5166 #undef SETERRQ
5167 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5168 
5169 EXTERN_C_BEGIN
5170 #undef __FUNCT__
5171 #define __FUNCT__ "matsetvaluesmpiaij_"
5172 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5173 {
5174   Mat             mat = *mmat;
5175   PetscInt        m = *mm, n = *mn;
5176   InsertMode      addv = *maddv;
5177   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5178   PetscScalar     value;
5179   PetscErrorCode  ierr;
5180 
5181   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5182   if (mat->insertmode == NOT_SET_VALUES) {
5183     mat->insertmode = addv;
5184   }
5185 #if defined(PETSC_USE_DEBUG)
5186   else if (mat->insertmode != addv) {
5187     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5188   }
5189 #endif
5190   {
5191   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5192   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5193   PetscTruth      roworiented = aij->roworiented;
5194 
5195   /* Some Variables required in the macro */
5196   Mat             A = aij->A;
5197   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5198   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5199   MatScalar       *aa = a->a;
5200   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5201   Mat             B = aij->B;
5202   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5203   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5204   MatScalar       *ba = b->a;
5205 
5206   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5207   PetscInt        nonew = a->nonew;
5208   MatScalar       *ap1,*ap2;
5209 
5210   PetscFunctionBegin;
5211   for (i=0; i<m; i++) {
5212     if (im[i] < 0) continue;
5213 #if defined(PETSC_USE_DEBUG)
5214     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5215 #endif
5216     if (im[i] >= rstart && im[i] < rend) {
5217       row      = im[i] - rstart;
5218       lastcol1 = -1;
5219       rp1      = aj + ai[row];
5220       ap1      = aa + ai[row];
5221       rmax1    = aimax[row];
5222       nrow1    = ailen[row];
5223       low1     = 0;
5224       high1    = nrow1;
5225       lastcol2 = -1;
5226       rp2      = bj + bi[row];
5227       ap2      = ba + bi[row];
5228       rmax2    = bimax[row];
5229       nrow2    = bilen[row];
5230       low2     = 0;
5231       high2    = nrow2;
5232 
5233       for (j=0; j<n; j++) {
5234         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5235         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5236         if (in[j] >= cstart && in[j] < cend){
5237           col = in[j] - cstart;
5238           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5239         } else if (in[j] < 0) continue;
5240 #if defined(PETSC_USE_DEBUG)
5241         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5242 #endif
5243         else {
5244           if (mat->was_assembled) {
5245             if (!aij->colmap) {
5246               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5247             }
5248 #if defined (PETSC_USE_CTABLE)
5249             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5250 	    col--;
5251 #else
5252             col = aij->colmap[in[j]] - 1;
5253 #endif
5254             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5255               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5256               col =  in[j];
5257               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5258               B = aij->B;
5259               b = (Mat_SeqAIJ*)B->data;
5260               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5261               rp2      = bj + bi[row];
5262               ap2      = ba + bi[row];
5263               rmax2    = bimax[row];
5264               nrow2    = bilen[row];
5265               low2     = 0;
5266               high2    = nrow2;
5267               bm       = aij->B->rmap->n;
5268               ba = b->a;
5269             }
5270           } else col = in[j];
5271           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5272         }
5273       }
5274     } else {
5275       if (!aij->donotstash) {
5276         if (roworiented) {
5277           if (ignorezeroentries && v[i*n] == 0.0) continue;
5278           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
5279         } else {
5280           if (ignorezeroentries && v[i] == 0.0) continue;
5281           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
5282         }
5283       }
5284     }
5285   }}
5286   PetscFunctionReturnVoid();
5287 }
5288 EXTERN_C_END
5289 
5290