xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision b807a863be2d35844e5a9048d0d2af900430b9bb)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "petscblaslapack.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscBool      aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap->n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap->n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap->n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap->n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
207 { \
208     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
209     lastcol1 = col;\
210     while (high1-low1 > 5) { \
211       t = (low1+high1)/2; \
212       if (rp1[t] > col) high1 = t; \
213       else             low1  = t; \
214     } \
215       for (_i=low1; _i<high1; _i++) { \
216         if (rp1[_i] > col) break; \
217         if (rp1[_i] == col) { \
218           if (addv == ADD_VALUES) ap1[_i] += value;   \
219           else                    ap1[_i] = value; \
220           goto a_noinsert; \
221         } \
222       }  \
223       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
224       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
225       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
226       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
227       N = nrow1++ - 1; a->nz++; high1++; \
228       /* shift up all the later entries in this row */ \
229       for (ii=N; ii>=_i; ii--) { \
230         rp1[ii+1] = rp1[ii]; \
231         ap1[ii+1] = ap1[ii]; \
232       } \
233       rp1[_i] = col;  \
234       ap1[_i] = value;  \
235       a_noinsert: ; \
236       ailen[row] = nrow1; \
237 }
238 
239 
240 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
241 { \
242     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
243     lastcol2 = col;\
244     while (high2-low2 > 5) { \
245       t = (low2+high2)/2; \
246       if (rp2[t] > col) high2 = t; \
247       else             low2  = t; \
248     } \
249     for (_i=low2; _i<high2; _i++) {		\
250       if (rp2[_i] > col) break;			\
251       if (rp2[_i] == col) {			      \
252 	if (addv == ADD_VALUES) ap2[_i] += value;     \
253 	else                    ap2[_i] = value;      \
254 	goto b_noinsert;			      \
255       }						      \
256     }							      \
257     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
258     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
259     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
260     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
261     N = nrow2++ - 1; b->nz++; high2++;					\
262     /* shift up all the later entries in this row */			\
263     for (ii=N; ii>=_i; ii--) {						\
264       rp2[ii+1] = rp2[ii];						\
265       ap2[ii+1] = ap2[ii];						\
266     }									\
267     rp2[_i] = col;							\
268     ap2[_i] = value;							\
269     b_noinsert: ;								\
270     bilen[row] = nrow2;							\
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
275 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
276 {
277   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
278   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
279   PetscErrorCode ierr;
280   PetscInt       l,*garray = mat->garray,diag;
281 
282   PetscFunctionBegin;
283   /* code only works for square matrices A */
284 
285   /* find size of row to the left of the diagonal part */
286   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
287   row  = row - diag;
288   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
289     if (garray[b->j[b->i[row]+l]] > diag) break;
290   }
291   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
292 
293   /* diagonal part */
294   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
295 
296   /* right of diagonal part */
297   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
298   PetscFunctionReturn(0);
299 }
300 
301 #undef __FUNCT__
302 #define __FUNCT__ "MatSetValues_MPIAIJ"
303 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
304 {
305   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
306   PetscScalar    value;
307   PetscErrorCode ierr;
308   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
309   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
310   PetscBool      roworiented = aij->roworiented;
311 
312   /* Some Variables required in the macro */
313   Mat            A = aij->A;
314   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
315   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
316   MatScalar      *aa = a->a;
317   PetscBool      ignorezeroentries = a->ignorezeroentries;
318   Mat            B = aij->B;
319   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
320   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
321   MatScalar      *ba = b->a;
322 
323   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
324   PetscInt       nonew = a->nonew;
325   MatScalar      *ap1,*ap2;
326 
327   PetscFunctionBegin;
328   if (v) PetscValidScalarPointer(v,6);
329   for (i=0; i<m; i++) {
330     if (im[i] < 0) continue;
331 #if defined(PETSC_USE_DEBUG)
332     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
333 #endif
334     if (im[i] >= rstart && im[i] < rend) {
335       row      = im[i] - rstart;
336       lastcol1 = -1;
337       rp1      = aj + ai[row];
338       ap1      = aa + ai[row];
339       rmax1    = aimax[row];
340       nrow1    = ailen[row];
341       low1     = 0;
342       high1    = nrow1;
343       lastcol2 = -1;
344       rp2      = bj + bi[row];
345       ap2      = ba + bi[row];
346       rmax2    = bimax[row];
347       nrow2    = bilen[row];
348       low2     = 0;
349       high2    = nrow2;
350 
351       for (j=0; j<n; j++) {
352         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
353         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
354         if (in[j] >= cstart && in[j] < cend){
355           col = in[j] - cstart;
356           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
357         } else if (in[j] < 0) continue;
358 #if defined(PETSC_USE_DEBUG)
359         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
360 #endif
361         else {
362           if (mat->was_assembled) {
363             if (!aij->colmap) {
364               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
365             }
366 #if defined (PETSC_USE_CTABLE)
367             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
368 	    col--;
369 #else
370             col = aij->colmap[in[j]] - 1;
371 #endif
372             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
373               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
374               col =  in[j];
375               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
376               B = aij->B;
377               b = (Mat_SeqAIJ*)B->data;
378               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
379               rp2      = bj + bi[row];
380               ap2      = ba + bi[row];
381               rmax2    = bimax[row];
382               nrow2    = bilen[row];
383               low2     = 0;
384               high2    = nrow2;
385               bm       = aij->B->rmap->n;
386               ba = b->a;
387             }
388           } else col = in[j];
389           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
390         }
391       }
392     } else {
393       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
394       if (!aij->donotstash) {
395         if (roworiented) {
396           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
397         } else {
398           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
399         }
400       }
401     }
402   }
403   PetscFunctionReturn(0);
404 }
405 
406 #undef __FUNCT__
407 #define __FUNCT__ "MatGetValues_MPIAIJ"
408 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
409 {
410   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
413   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
414 
415   PetscFunctionBegin;
416   for (i=0; i<m; i++) {
417     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
418     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
419     if (idxm[i] >= rstart && idxm[i] < rend) {
420       row = idxm[i] - rstart;
421       for (j=0; j<n; j++) {
422         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
423         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
424         if (idxn[j] >= cstart && idxn[j] < cend){
425           col = idxn[j] - cstart;
426           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
427         } else {
428           if (!aij->colmap) {
429             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
430           }
431 #if defined (PETSC_USE_CTABLE)
432           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
433           col --;
434 #else
435           col = aij->colmap[idxn[j]] - 1;
436 #endif
437           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
438           else {
439             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
440           }
441         }
442       }
443     } else {
444       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
445     }
446   }
447   PetscFunctionReturn(0);
448 }
449 
450 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscErrorCode ierr;
458   PetscInt       nstash,reallocs;
459   InsertMode     addv;
460 
461   PetscFunctionBegin;
462   if (aij->donotstash || mat->nooffprocentries) {
463     PetscFunctionReturn(0);
464   }
465 
466   /* make sure all processors are either in INSERTMODE or ADDMODE */
467   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
468   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
469   mat->insertmode = addv; /* in case this processor had no cache */
470 
471   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
472   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
473   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
474   PetscFunctionReturn(0);
475 }
476 
477 #undef __FUNCT__
478 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
479 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
480 {
481   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
482   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
483   PetscErrorCode ierr;
484   PetscMPIInt    n;
485   PetscInt       i,j,rstart,ncols,flg;
486   PetscInt       *row,*col;
487   PetscBool      other_disassembled;
488   PetscScalar    *val;
489   InsertMode     addv = mat->insertmode;
490 
491   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
492   PetscFunctionBegin;
493   if (!aij->donotstash && !mat->nooffprocentries) {
494     while (1) {
495       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
496       if (!flg) break;
497 
498       for (i=0; i<n;) {
499         /* Now identify the consecutive vals belonging to the same row */
500         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
501         if (j < n) ncols = j-i;
502         else       ncols = n-i;
503         /* Now assemble all these values with a single function call */
504         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
505         i = j;
506       }
507     }
508     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
509   }
510   a->compressedrow.use     = PETSC_FALSE;
511   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
512   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
513 
514   /* determine if any processor has disassembled, if so we must
515      also disassemble ourselfs, in order that we may reassemble. */
516   /*
517      if nonzero structure of submatrix B cannot change then we know that
518      no processor disassembled thus we can skip this stuff
519   */
520   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
521     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
522     if (mat->was_assembled && !other_disassembled) {
523       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
524     }
525   }
526   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
527     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
528   }
529   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
530   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
531   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
532   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
533 
534   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
535   aij->rowvalues = 0;
536 
537   /* used by MatAXPY() */
538   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
539   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
540 
541   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
542   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
549 {
550   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
551   PetscErrorCode ierr;
552 
553   PetscFunctionBegin;
554   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
555   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNCT__
560 #define __FUNCT__ "MatZeroRows_MPIAIJ"
561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
562 {
563   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
564   PetscErrorCode    ierr;
565   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
566   PetscInt          i,*owners = A->rmap->range;
567   PetscInt          *nprocs,j,idx,nsends,row;
568   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
569   PetscInt          *rvalues,count,base,slen,*source;
570   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
571   MPI_Comm          comm = ((PetscObject)A)->comm;
572   MPI_Request       *send_waits,*recv_waits;
573   MPI_Status        recv_status,*send_status;
574   const PetscScalar *xx;
575   PetscScalar       *bb;
576 #if defined(PETSC_DEBUG)
577   PetscBool      found = PETSC_FALSE;
578 #endif
579 
580   PetscFunctionBegin;
581   /*  first count number of contributors to each processor */
582   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
583   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
584   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
585   j = 0;
586   for (i=0; i<N; i++) {
587     if (lastidx > (idx = rows[i])) j = 0;
588     lastidx = idx;
589     for (; j<size; j++) {
590       if (idx >= owners[j] && idx < owners[j+1]) {
591         nprocs[2*j]++;
592         nprocs[2*j+1] = 1;
593         owner[i] = j;
594 #if defined(PETSC_DEBUG)
595         found = PETSC_TRUE;
596 #endif
597         break;
598       }
599     }
600 #if defined(PETSC_DEBUG)
601     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
602     found = PETSC_FALSE;
603 #endif
604   }
605   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
606 
607   if (A->nooffproczerorows) {
608     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
609     nrecvs = nsends;
610     nmax   = N;
611   } else {
612     /* inform other processors of number of messages and max length*/
613     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
614   }
615 
616   /* post receives:   */
617   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
618   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
619   for (i=0; i<nrecvs; i++) {
620     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
621   }
622 
623   /* do sends:
624       1) starts[i] gives the starting index in svalues for stuff going to
625          the ith processor
626   */
627   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
628   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
629   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
630   starts[0] = 0;
631   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
632   for (i=0; i<N; i++) {
633     svalues[starts[owner[i]]++] = rows[i];
634   }
635 
636   starts[0] = 0;
637   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
638   count = 0;
639   for (i=0; i<size; i++) {
640     if (nprocs[2*i+1]) {
641       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
642     }
643   }
644   ierr = PetscFree(starts);CHKERRQ(ierr);
645 
646   base = owners[rank];
647 
648   /*  wait on receives */
649   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
650   count  = nrecvs; slen = 0;
651   while (count) {
652     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
653     /* unpack receives into our local space */
654     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
655     source[imdex]  = recv_status.MPI_SOURCE;
656     lens[imdex]    = n;
657     slen          += n;
658     count--;
659   }
660   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
661 
662   /* move the data into the send scatter */
663   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
664   count = 0;
665   for (i=0; i<nrecvs; i++) {
666     values = rvalues + i*nmax;
667     for (j=0; j<lens[i]; j++) {
668       lrows[count++] = values[j] - base;
669     }
670   }
671   ierr = PetscFree(rvalues);CHKERRQ(ierr);
672   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
673   ierr = PetscFree(owner);CHKERRQ(ierr);
674   ierr = PetscFree(nprocs);CHKERRQ(ierr);
675 
676   /* fix right hand side if needed */
677   if (x && b) {
678     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
679     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
680     for (i=0; i<N; i++) {
681       bb[lrows[i]] = diag*xx[lrows[i]];
682     }
683     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
684     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
685   }
686   /*
687         Zero the required rows. If the "diagonal block" of the matrix
688      is square and the user wishes to set the diagonal we use separate
689      code so that MatSetValues() is not called for each diagonal allocating
690      new memory, thus calling lots of mallocs and slowing things down.
691 
692   */
693   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
694   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
695   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
696     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
697   } else if (diag != 0.0) {
698     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
699     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
700       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
701 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
702     }
703     for (i = 0; i < slen; i++) {
704       row  = lrows[i] + rstart;
705       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
706     }
707     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
708     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
709   } else {
710     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
711   }
712   ierr = PetscFree(lrows);CHKERRQ(ierr);
713 
714   /* wait on sends */
715   if (nsends) {
716     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
717     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
718     ierr = PetscFree(send_status);CHKERRQ(ierr);
719   }
720   ierr = PetscFree(send_waits);CHKERRQ(ierr);
721   ierr = PetscFree(svalues);CHKERRQ(ierr);
722 
723   PetscFunctionReturn(0);
724 }
725 
726 #undef __FUNCT__
727 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
728 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
729 {
730   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
731   PetscErrorCode    ierr;
732   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
733   PetscInt          i,*owners = A->rmap->range;
734   PetscInt          *nprocs,j,idx,nsends,row;
735   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
736   PetscInt          *rvalues,count,base,slen,*source;
737   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
738   MPI_Comm          comm = ((PetscObject)A)->comm;
739   MPI_Request       *send_waits,*recv_waits;
740   MPI_Status        recv_status,*send_status;
741   const PetscScalar *xx;
742   PetscScalar       *bb;
743 #if defined(PETSC_DEBUG)
744   PetscBool      found = PETSC_FALSE;
745 #endif
746 
747   PetscFunctionBegin;
748   /*  first count number of contributors to each processor */
749   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
750   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
751   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
752   j = 0;
753   for (i=0; i<N; i++) {
754     if (lastidx > (idx = rows[i])) j = 0;
755     lastidx = idx;
756     for (; j<size; j++) {
757       if (idx >= owners[j] && idx < owners[j+1]) {
758         nprocs[2*j]++;
759         nprocs[2*j+1] = 1;
760         owner[i] = j;
761 #if defined(PETSC_DEBUG)
762         found = PETSC_TRUE;
763 #endif
764         break;
765       }
766     }
767 #if defined(PETSC_DEBUG)
768     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
769     found = PETSC_FALSE;
770 #endif
771   }
772   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
773 
774   if (A->nooffproczerorows) {
775     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
776     nrecvs = nsends;
777     nmax   = N;
778   } else {
779     /* inform other processors of number of messages and max length*/
780     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
781   }
782 
783   /* post receives:   */
784   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
785   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
786   for (i=0; i<nrecvs; i++) {
787     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
788   }
789 
790   /* do sends:
791       1) starts[i] gives the starting index in svalues for stuff going to
792          the ith processor
793   */
794   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
795   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
796   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
797   starts[0] = 0;
798   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
799   for (i=0; i<N; i++) {
800     svalues[starts[owner[i]]++] = rows[i];
801   }
802 
803   starts[0] = 0;
804   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
805   count = 0;
806   for (i=0; i<size; i++) {
807     if (nprocs[2*i+1]) {
808       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
809     }
810   }
811   ierr = PetscFree(starts);CHKERRQ(ierr);
812 
813   base = owners[rank];
814 
815   /*  wait on receives */
816   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
817   count  = nrecvs; slen = 0;
818   while (count) {
819     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
820     /* unpack receives into our local space */
821     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
822     source[imdex]  = recv_status.MPI_SOURCE;
823     lens[imdex]    = n;
824     slen          += n;
825     count--;
826   }
827   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
828 
829   /* move the data into the send scatter */
830   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
831   count = 0;
832   for (i=0; i<nrecvs; i++) {
833     values = rvalues + i*nmax;
834     for (j=0; j<lens[i]; j++) {
835       lrows[count++] = values[j] - base;
836     }
837   }
838   ierr = PetscFree(rvalues);CHKERRQ(ierr);
839   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
840   ierr = PetscFree(owner);CHKERRQ(ierr);
841   ierr = PetscFree(nprocs);CHKERRQ(ierr);
842 
843   /* fix right hand side if needed */
844   if (x && b) {
845     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
846     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
847     for (i=0; i<N; i++) {
848       bb[lrows[i]] = diag*xx[lrows[i]];
849     }
850     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
851     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
852   }
853   /*
854         Zero the required rows. If the "diagonal block" of the matrix
855      is square and the user wishes to set the diagonal we use separate
856      code so that MatSetValues() is not called for each diagonal allocating
857      new memory, thus calling lots of mallocs and slowing things down.
858 
859   */
860   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
861   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
862   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
863     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
864   } else if (diag != 0.0) {
865     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
866     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
867       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
868 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
869     }
870     for (i = 0; i < slen; i++) {
871       row  = lrows[i] + rstart;
872       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
873     }
874     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
875     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
876   } else {
877     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
878   }
879   ierr = PetscFree(lrows);CHKERRQ(ierr);
880 
881   /* wait on sends */
882   if (nsends) {
883     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
884     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
885     ierr = PetscFree(send_status);CHKERRQ(ierr);
886   }
887   ierr = PetscFree(send_waits);CHKERRQ(ierr);
888   ierr = PetscFree(svalues);CHKERRQ(ierr);
889 
890   PetscFunctionReturn(0);
891 }
892 
893 #undef __FUNCT__
894 #define __FUNCT__ "MatMult_MPIAIJ"
895 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
896 {
897   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
898   PetscErrorCode ierr;
899   PetscInt       nt;
900 
901   PetscFunctionBegin;
902   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
903   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
904   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
905   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
906   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
907   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
908   PetscFunctionReturn(0);
909 }
910 
911 #undef __FUNCT__
912 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
913 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
914 {
915   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
916   PetscErrorCode ierr;
917 
918   PetscFunctionBegin;
919   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
920   PetscFunctionReturn(0);
921 }
922 
923 #undef __FUNCT__
924 #define __FUNCT__ "MatMultAdd_MPIAIJ"
925 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
926 {
927   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
928   PetscErrorCode ierr;
929 
930   PetscFunctionBegin;
931   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
932   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
933   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
934   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
935   PetscFunctionReturn(0);
936 }
937 
938 #undef __FUNCT__
939 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
940 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
941 {
942   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
943   PetscErrorCode ierr;
944   PetscBool      merged;
945 
946   PetscFunctionBegin;
947   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
948   /* do nondiagonal part */
949   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
950   if (!merged) {
951     /* send it on its way */
952     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
953     /* do local part */
954     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
955     /* receive remote parts: note this assumes the values are not actually */
956     /* added in yy until the next line, */
957     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
958   } else {
959     /* do local part */
960     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
961     /* send it on its way */
962     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
963     /* values actually were received in the Begin() but we need to call this nop */
964     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
965   }
966   PetscFunctionReturn(0);
967 }
968 
969 EXTERN_C_BEGIN
970 #undef __FUNCT__
971 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
972 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
973 {
974   MPI_Comm       comm;
975   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
976   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
977   IS             Me,Notme;
978   PetscErrorCode ierr;
979   PetscInt       M,N,first,last,*notme,i;
980   PetscMPIInt    size;
981 
982   PetscFunctionBegin;
983 
984   /* Easy test: symmetric diagonal block */
985   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
986   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
987   if (!*f) PetscFunctionReturn(0);
988   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
989   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
990   if (size == 1) PetscFunctionReturn(0);
991 
992   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
993   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
994   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
995   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
996   for (i=0; i<first; i++) notme[i] = i;
997   for (i=last; i<M; i++) notme[i-last+first] = i;
998   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
999   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1000   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1001   Aoff = Aoffs[0];
1002   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1003   Boff = Boffs[0];
1004   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1005   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1006   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1007   ierr = ISDestroy(Me);CHKERRQ(ierr);
1008   ierr = ISDestroy(Notme);CHKERRQ(ierr);
1009 
1010   PetscFunctionReturn(0);
1011 }
1012 EXTERN_C_END
1013 
1014 #undef __FUNCT__
1015 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1016 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1017 {
1018   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1019   PetscErrorCode ierr;
1020 
1021   PetscFunctionBegin;
1022   /* do nondiagonal part */
1023   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1024   /* send it on its way */
1025   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1026   /* do local part */
1027   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1028   /* receive remote parts */
1029   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1030   PetscFunctionReturn(0);
1031 }
1032 
1033 /*
1034   This only works correctly for square matrices where the subblock A->A is the
1035    diagonal block
1036 */
1037 #undef __FUNCT__
1038 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1039 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1040 {
1041   PetscErrorCode ierr;
1042   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1043 
1044   PetscFunctionBegin;
1045   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1046   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1047   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 #undef __FUNCT__
1052 #define __FUNCT__ "MatScale_MPIAIJ"
1053 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1054 {
1055   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1056   PetscErrorCode ierr;
1057 
1058   PetscFunctionBegin;
1059   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1060   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1061   PetscFunctionReturn(0);
1062 }
1063 
1064 #undef __FUNCT__
1065 #define __FUNCT__ "MatDestroy_MPIAIJ"
1066 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1067 {
1068   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1069   PetscErrorCode ierr;
1070 
1071   PetscFunctionBegin;
1072 #if defined(PETSC_USE_LOG)
1073   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1074 #endif
1075   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1076   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
1077   if (aij->A){ierr = MatDestroy(aij->A);CHKERRQ(ierr);}
1078   if (aij->B){ierr = MatDestroy(aij->B);CHKERRQ(ierr);}
1079 #if defined (PETSC_USE_CTABLE)
1080   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
1081 #else
1082   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1083 #endif
1084   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1085   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
1086   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
1087   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1088   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1089   ierr = PetscFree(aij);CHKERRQ(ierr);
1090 
1091   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1092   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1093   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1094   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1095   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1096   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1097   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1098   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1099   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 #undef __FUNCT__
1104 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1105 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1106 {
1107   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1108   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1109   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1110   PetscErrorCode    ierr;
1111   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1112   int               fd;
1113   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1114   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1115   PetscScalar       *column_values;
1116   PetscInt          message_count,flowcontrolcount;
1117 
1118   PetscFunctionBegin;
1119   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1120   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1121   nz   = A->nz + B->nz;
1122   if (!rank) {
1123     header[0] = MAT_FILE_CLASSID;
1124     header[1] = mat->rmap->N;
1125     header[2] = mat->cmap->N;
1126     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1127     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1128     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1129     /* get largest number of rows any processor has */
1130     rlen = mat->rmap->n;
1131     range = mat->rmap->range;
1132     for (i=1; i<size; i++) {
1133       rlen = PetscMax(rlen,range[i+1] - range[i]);
1134     }
1135   } else {
1136     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1137     rlen = mat->rmap->n;
1138   }
1139 
1140   /* load up the local row counts */
1141   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1142   for (i=0; i<mat->rmap->n; i++) {
1143     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1144   }
1145 
1146   /* store the row lengths to the file */
1147   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1148   if (!rank) {
1149     MPI_Status status;
1150     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1151     for (i=1; i<size; i++) {
1152       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1153       rlen = range[i+1] - range[i];
1154       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1155       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1156     }
1157     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1158   } else {
1159     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1160     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1161     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1162   }
1163   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1164 
1165   /* load up the local column indices */
1166   nzmax = nz; /* )th processor needs space a largest processor needs */
1167   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1168   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1169   cnt  = 0;
1170   for (i=0; i<mat->rmap->n; i++) {
1171     for (j=B->i[i]; j<B->i[i+1]; j++) {
1172       if ( (col = garray[B->j[j]]) > cstart) break;
1173       column_indices[cnt++] = col;
1174     }
1175     for (k=A->i[i]; k<A->i[i+1]; k++) {
1176       column_indices[cnt++] = A->j[k] + cstart;
1177     }
1178     for (; j<B->i[i+1]; j++) {
1179       column_indices[cnt++] = garray[B->j[j]];
1180     }
1181   }
1182   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1183 
1184   /* store the column indices to the file */
1185    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1186   if (!rank) {
1187     MPI_Status status;
1188     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1189     for (i=1; i<size; i++) {
1190       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1191       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1192       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1193       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1194       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1195     }
1196      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1197   } else {
1198     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1199     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1200     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1201     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1202   }
1203   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1204 
1205   /* load up the local column values */
1206   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1207   cnt  = 0;
1208   for (i=0; i<mat->rmap->n; i++) {
1209     for (j=B->i[i]; j<B->i[i+1]; j++) {
1210       if ( garray[B->j[j]] > cstart) break;
1211       column_values[cnt++] = B->a[j];
1212     }
1213     for (k=A->i[i]; k<A->i[i+1]; k++) {
1214       column_values[cnt++] = A->a[k];
1215     }
1216     for (; j<B->i[i+1]; j++) {
1217       column_values[cnt++] = B->a[j];
1218     }
1219   }
1220   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1221 
1222   /* store the column values to the file */
1223    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1224   if (!rank) {
1225     MPI_Status status;
1226     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1227     for (i=1; i<size; i++) {
1228        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1229       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1230       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1231       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1232       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1233     }
1234     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1235   } else {
1236     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1237     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1238     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1239     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1240   }
1241   ierr = PetscFree(column_values);CHKERRQ(ierr);
1242   PetscFunctionReturn(0);
1243 }
1244 
1245 #undef __FUNCT__
1246 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1247 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1248 {
1249   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1250   PetscErrorCode    ierr;
1251   PetscMPIInt       rank = aij->rank,size = aij->size;
1252   PetscBool         isdraw,iascii,isbinary;
1253   PetscViewer       sviewer;
1254   PetscViewerFormat format;
1255 
1256   PetscFunctionBegin;
1257   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1258   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1259   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1260   if (iascii) {
1261     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1262     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1263       MatInfo    info;
1264       PetscBool  inodes;
1265 
1266       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1267       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1268       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1269       if (!inodes) {
1270         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1271 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1272       } else {
1273         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1274 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1275       }
1276       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1277       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1278       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1279       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1280       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1281       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1282       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1283       PetscFunctionReturn(0);
1284     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1285       PetscInt   inodecount,inodelimit,*inodes;
1286       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1287       if (inodes) {
1288         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1289       } else {
1290         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1291       }
1292       PetscFunctionReturn(0);
1293     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1294       PetscFunctionReturn(0);
1295     }
1296   } else if (isbinary) {
1297     if (size == 1) {
1298       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1299       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1300     } else {
1301       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1302     }
1303     PetscFunctionReturn(0);
1304   } else if (isdraw) {
1305     PetscDraw  draw;
1306     PetscBool  isnull;
1307     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1308     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1309   }
1310 
1311   if (size == 1) {
1312     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1313     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1314   } else {
1315     /* assemble the entire matrix onto first processor. */
1316     Mat         A;
1317     Mat_SeqAIJ  *Aloc;
1318     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1319     MatScalar   *a;
1320 
1321     if (mat->rmap->N > 1024) {
1322       PetscBool  flg = PETSC_FALSE;
1323 
1324       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1325       if (!flg) {
1326         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1327       }
1328     }
1329 
1330     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1331     if (!rank) {
1332       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1333     } else {
1334       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1335     }
1336     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1337     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1338     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1339     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1340 
1341     /* copy over the A part */
1342     Aloc = (Mat_SeqAIJ*)aij->A->data;
1343     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1344     row = mat->rmap->rstart;
1345     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1346     for (i=0; i<m; i++) {
1347       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1348       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1349     }
1350     aj = Aloc->j;
1351     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1352 
1353     /* copy over the B part */
1354     Aloc = (Mat_SeqAIJ*)aij->B->data;
1355     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1356     row  = mat->rmap->rstart;
1357     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1358     ct   = cols;
1359     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1360     for (i=0; i<m; i++) {
1361       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1362       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1363     }
1364     ierr = PetscFree(ct);CHKERRQ(ierr);
1365     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1366     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1367     /*
1368        Everyone has to call to draw the matrix since the graphics waits are
1369        synchronized across all processors that share the PetscDraw object
1370     */
1371     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1372     if (!rank) {
1373       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1374       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1375       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1376       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1377     }
1378     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1379     ierr = MatDestroy(A);CHKERRQ(ierr);
1380   }
1381   PetscFunctionReturn(0);
1382 }
1383 
1384 #undef __FUNCT__
1385 #define __FUNCT__ "MatView_MPIAIJ"
1386 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1387 {
1388   PetscErrorCode ierr;
1389   PetscBool      iascii,isdraw,issocket,isbinary;
1390 
1391   PetscFunctionBegin;
1392   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1393   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1394   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1395   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1396   if (iascii || isdraw || isbinary || issocket) {
1397     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1398   } else {
1399     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1400   }
1401   PetscFunctionReturn(0);
1402 }
1403 
1404 #undef __FUNCT__
1405 #define __FUNCT__ "MatSOR_MPIAIJ"
1406 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1407 {
1408   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1409   PetscErrorCode ierr;
1410   Vec            bb1 = 0;
1411   PetscBool      hasop;
1412 
1413   PetscFunctionBegin;
1414   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1415     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1416   }
1417 
1418   if (flag == SOR_APPLY_UPPER) {
1419     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1420     PetscFunctionReturn(0);
1421   }
1422 
1423   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1424     if (flag & SOR_ZERO_INITIAL_GUESS) {
1425       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1426       its--;
1427     }
1428 
1429     while (its--) {
1430       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1431       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1432 
1433       /* update rhs: bb1 = bb - B*x */
1434       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1435       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1436 
1437       /* local sweep */
1438       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1439     }
1440   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1441     if (flag & SOR_ZERO_INITIAL_GUESS) {
1442       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1443       its--;
1444     }
1445     while (its--) {
1446       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1447       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1448 
1449       /* update rhs: bb1 = bb - B*x */
1450       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1451       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1452 
1453       /* local sweep */
1454       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1455     }
1456   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1457     if (flag & SOR_ZERO_INITIAL_GUESS) {
1458       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1459       its--;
1460     }
1461     while (its--) {
1462       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1463       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1464 
1465       /* update rhs: bb1 = bb - B*x */
1466       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1467       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1468 
1469       /* local sweep */
1470       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1471     }
1472   }  else if (flag & SOR_EISENSTAT) {
1473     Vec         xx1;
1474 
1475     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1476     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1477 
1478     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1479     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1480     if (!mat->diag) {
1481       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1482       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1483     }
1484     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1485     if (hasop) {
1486       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1487     } else {
1488       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1489     }
1490     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1491 
1492     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1493 
1494     /* local sweep */
1495     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1496     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1497     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1498   } else {
1499     SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1500   }
1501 
1502   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1503   PetscFunctionReturn(0);
1504 }
1505 
1506 #undef __FUNCT__
1507 #define __FUNCT__ "MatPermute_MPIAIJ"
1508 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1509 {
1510   MPI_Comm       comm,pcomm;
1511   PetscInt       first,local_size,nrows;
1512   const PetscInt *rows;
1513   PetscMPIInt    size;
1514   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1515   PetscErrorCode ierr;
1516 
1517   PetscFunctionBegin;
1518   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1519   /* make a collective version of 'rowp' */
1520   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1521   if (pcomm==comm) {
1522     crowp = rowp;
1523   } else {
1524     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1525     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1526     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
1527     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1528   }
1529   /* collect the global row permutation and invert it */
1530   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1531   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1532   if (pcomm!=comm) {
1533     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1534   }
1535   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1536   /* get the local target indices */
1537   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1538   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1539   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1540   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1541   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1542   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1543   /* the column permutation is so much easier;
1544      make a local version of 'colp' and invert it */
1545   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1546   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1547   if (size==1) {
1548     lcolp = colp;
1549   } else {
1550     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1551     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1552     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
1553   }
1554   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1555   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1556   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1557   if (size>1) {
1558     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1559     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1560   }
1561   /* now we just get the submatrix */
1562   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1563   /* clean up */
1564   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1565   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1566   PetscFunctionReturn(0);
1567 }
1568 
1569 #undef __FUNCT__
1570 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1571 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1572 {
1573   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1574   Mat            A = mat->A,B = mat->B;
1575   PetscErrorCode ierr;
1576   PetscReal      isend[5],irecv[5];
1577 
1578   PetscFunctionBegin;
1579   info->block_size     = 1.0;
1580   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1581   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1582   isend[3] = info->memory;  isend[4] = info->mallocs;
1583   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1584   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1585   isend[3] += info->memory;  isend[4] += info->mallocs;
1586   if (flag == MAT_LOCAL) {
1587     info->nz_used      = isend[0];
1588     info->nz_allocated = isend[1];
1589     info->nz_unneeded  = isend[2];
1590     info->memory       = isend[3];
1591     info->mallocs      = isend[4];
1592   } else if (flag == MAT_GLOBAL_MAX) {
1593     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1594     info->nz_used      = irecv[0];
1595     info->nz_allocated = irecv[1];
1596     info->nz_unneeded  = irecv[2];
1597     info->memory       = irecv[3];
1598     info->mallocs      = irecv[4];
1599   } else if (flag == MAT_GLOBAL_SUM) {
1600     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1601     info->nz_used      = irecv[0];
1602     info->nz_allocated = irecv[1];
1603     info->nz_unneeded  = irecv[2];
1604     info->memory       = irecv[3];
1605     info->mallocs      = irecv[4];
1606   }
1607   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1608   info->fill_ratio_needed = 0;
1609   info->factor_mallocs    = 0;
1610 
1611   PetscFunctionReturn(0);
1612 }
1613 
1614 #undef __FUNCT__
1615 #define __FUNCT__ "MatSetOption_MPIAIJ"
1616 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1617 {
1618   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1619   PetscErrorCode ierr;
1620 
1621   PetscFunctionBegin;
1622   switch (op) {
1623   case MAT_NEW_NONZERO_LOCATIONS:
1624   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1625   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1626   case MAT_KEEP_NONZERO_PATTERN:
1627   case MAT_NEW_NONZERO_LOCATION_ERR:
1628   case MAT_USE_INODES:
1629   case MAT_IGNORE_ZERO_ENTRIES:
1630     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1631     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1632     break;
1633   case MAT_ROW_ORIENTED:
1634     a->roworiented = flg;
1635     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1636     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1637     break;
1638   case MAT_NEW_DIAGONALS:
1639     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1640     break;
1641   case MAT_IGNORE_OFF_PROC_ENTRIES:
1642     a->donotstash = PETSC_TRUE;
1643     break;
1644   case MAT_SPD:
1645     A->spd_set                         = PETSC_TRUE;
1646     A->spd                             = flg;
1647     if (flg) {
1648       A->symmetric                     = PETSC_TRUE;
1649       A->structurally_symmetric        = PETSC_TRUE;
1650       A->symmetric_set                 = PETSC_TRUE;
1651       A->structurally_symmetric_set    = PETSC_TRUE;
1652     }
1653     break;
1654   case MAT_SYMMETRIC:
1655     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1656     break;
1657   case MAT_STRUCTURALLY_SYMMETRIC:
1658     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1659     break;
1660   case MAT_HERMITIAN:
1661     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1662     break;
1663   case MAT_SYMMETRY_ETERNAL:
1664     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1665     break;
1666   default:
1667     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1668   }
1669   PetscFunctionReturn(0);
1670 }
1671 
1672 #undef __FUNCT__
1673 #define __FUNCT__ "MatGetRow_MPIAIJ"
1674 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1675 {
1676   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1677   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1678   PetscErrorCode ierr;
1679   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1680   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1681   PetscInt       *cmap,*idx_p;
1682 
1683   PetscFunctionBegin;
1684   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1685   mat->getrowactive = PETSC_TRUE;
1686 
1687   if (!mat->rowvalues && (idx || v)) {
1688     /*
1689         allocate enough space to hold information from the longest row.
1690     */
1691     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1692     PetscInt   max = 1,tmp;
1693     for (i=0; i<matin->rmap->n; i++) {
1694       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1695       if (max < tmp) { max = tmp; }
1696     }
1697     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1698   }
1699 
1700   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1701   lrow = row - rstart;
1702 
1703   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1704   if (!v)   {pvA = 0; pvB = 0;}
1705   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1706   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1707   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1708   nztot = nzA + nzB;
1709 
1710   cmap  = mat->garray;
1711   if (v  || idx) {
1712     if (nztot) {
1713       /* Sort by increasing column numbers, assuming A and B already sorted */
1714       PetscInt imark = -1;
1715       if (v) {
1716         *v = v_p = mat->rowvalues;
1717         for (i=0; i<nzB; i++) {
1718           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1719           else break;
1720         }
1721         imark = i;
1722         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1723         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1724       }
1725       if (idx) {
1726         *idx = idx_p = mat->rowindices;
1727         if (imark > -1) {
1728           for (i=0; i<imark; i++) {
1729             idx_p[i] = cmap[cworkB[i]];
1730           }
1731         } else {
1732           for (i=0; i<nzB; i++) {
1733             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1734             else break;
1735           }
1736           imark = i;
1737         }
1738         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1739         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1740       }
1741     } else {
1742       if (idx) *idx = 0;
1743       if (v)   *v   = 0;
1744     }
1745   }
1746   *nz = nztot;
1747   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1748   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1749   PetscFunctionReturn(0);
1750 }
1751 
1752 #undef __FUNCT__
1753 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1754 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1755 {
1756   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1757 
1758   PetscFunctionBegin;
1759   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1760   aij->getrowactive = PETSC_FALSE;
1761   PetscFunctionReturn(0);
1762 }
1763 
1764 #undef __FUNCT__
1765 #define __FUNCT__ "MatNorm_MPIAIJ"
1766 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1767 {
1768   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1769   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1770   PetscErrorCode ierr;
1771   PetscInt       i,j,cstart = mat->cmap->rstart;
1772   PetscReal      sum = 0.0;
1773   MatScalar      *v;
1774 
1775   PetscFunctionBegin;
1776   if (aij->size == 1) {
1777     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1778   } else {
1779     if (type == NORM_FROBENIUS) {
1780       v = amat->a;
1781       for (i=0; i<amat->nz; i++) {
1782 #if defined(PETSC_USE_COMPLEX)
1783         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1784 #else
1785         sum += (*v)*(*v); v++;
1786 #endif
1787       }
1788       v = bmat->a;
1789       for (i=0; i<bmat->nz; i++) {
1790 #if defined(PETSC_USE_COMPLEX)
1791         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1792 #else
1793         sum += (*v)*(*v); v++;
1794 #endif
1795       }
1796       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1797       *norm = sqrt(*norm);
1798     } else if (type == NORM_1) { /* max column norm */
1799       PetscReal *tmp,*tmp2;
1800       PetscInt  *jj,*garray = aij->garray;
1801       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1802       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1803       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1804       *norm = 0.0;
1805       v = amat->a; jj = amat->j;
1806       for (j=0; j<amat->nz; j++) {
1807         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1808       }
1809       v = bmat->a; jj = bmat->j;
1810       for (j=0; j<bmat->nz; j++) {
1811         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1812       }
1813       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1814       for (j=0; j<mat->cmap->N; j++) {
1815         if (tmp2[j] > *norm) *norm = tmp2[j];
1816       }
1817       ierr = PetscFree(tmp);CHKERRQ(ierr);
1818       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1819     } else if (type == NORM_INFINITY) { /* max row norm */
1820       PetscReal ntemp = 0.0;
1821       for (j=0; j<aij->A->rmap->n; j++) {
1822         v = amat->a + amat->i[j];
1823         sum = 0.0;
1824         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1825           sum += PetscAbsScalar(*v); v++;
1826         }
1827         v = bmat->a + bmat->i[j];
1828         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1829           sum += PetscAbsScalar(*v); v++;
1830         }
1831         if (sum > ntemp) ntemp = sum;
1832       }
1833       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1834     } else {
1835       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1836     }
1837   }
1838   PetscFunctionReturn(0);
1839 }
1840 
1841 #undef __FUNCT__
1842 #define __FUNCT__ "MatTranspose_MPIAIJ"
1843 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1844 {
1845   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1846   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1847   PetscErrorCode ierr;
1848   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1849   PetscInt       cstart=A->cmap->rstart,ncol;
1850   Mat            B;
1851   MatScalar      *array;
1852 
1853   PetscFunctionBegin;
1854   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1855 
1856   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1857   ai = Aloc->i; aj = Aloc->j;
1858   bi = Bloc->i; bj = Bloc->j;
1859   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1860     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1861     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1862     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1863     for (i=0; i<ai[ma]; i++){
1864       d_nnz[aj[i]] ++;
1865       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1866     }
1867 
1868     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1869     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1870     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1871     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1872     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1873   } else {
1874     B = *matout;
1875   }
1876 
1877   /* copy over the A part */
1878   array = Aloc->a;
1879   row = A->rmap->rstart;
1880   for (i=0; i<ma; i++) {
1881     ncol = ai[i+1]-ai[i];
1882     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1883     row++; array += ncol; aj += ncol;
1884   }
1885   aj = Aloc->j;
1886   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1887 
1888   /* copy over the B part */
1889   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1890   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1891   array = Bloc->a;
1892   row = A->rmap->rstart;
1893   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1894   cols_tmp = cols;
1895   for (i=0; i<mb; i++) {
1896     ncol = bi[i+1]-bi[i];
1897     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1898     row++; array += ncol; cols_tmp += ncol;
1899   }
1900   ierr = PetscFree(cols);CHKERRQ(ierr);
1901 
1902   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1903   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1904   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1905     *matout = B;
1906   } else {
1907     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1908   }
1909   PetscFunctionReturn(0);
1910 }
1911 
1912 #undef __FUNCT__
1913 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1914 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1915 {
1916   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1917   Mat            a = aij->A,b = aij->B;
1918   PetscErrorCode ierr;
1919   PetscInt       s1,s2,s3;
1920 
1921   PetscFunctionBegin;
1922   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1923   if (rr) {
1924     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1925     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1926     /* Overlap communication with computation. */
1927     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1928   }
1929   if (ll) {
1930     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1931     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1932     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1933   }
1934   /* scale  the diagonal block */
1935   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1936 
1937   if (rr) {
1938     /* Do a scatter end and then right scale the off-diagonal block */
1939     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1940     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1941   }
1942 
1943   PetscFunctionReturn(0);
1944 }
1945 
1946 #undef __FUNCT__
1947 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1948 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1949 {
1950   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1951   PetscErrorCode ierr;
1952 
1953   PetscFunctionBegin;
1954   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1955   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1956   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
1957   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
1958   PetscFunctionReturn(0);
1959 }
1960 #undef __FUNCT__
1961 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1962 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1963 {
1964   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1965   PetscErrorCode ierr;
1966 
1967   PetscFunctionBegin;
1968   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1969   PetscFunctionReturn(0);
1970 }
1971 
1972 #undef __FUNCT__
1973 #define __FUNCT__ "MatEqual_MPIAIJ"
1974 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
1975 {
1976   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1977   Mat            a,b,c,d;
1978   PetscBool      flg;
1979   PetscErrorCode ierr;
1980 
1981   PetscFunctionBegin;
1982   a = matA->A; b = matA->B;
1983   c = matB->A; d = matB->B;
1984 
1985   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1986   if (flg) {
1987     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1988   }
1989   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1990   PetscFunctionReturn(0);
1991 }
1992 
1993 #undef __FUNCT__
1994 #define __FUNCT__ "MatCopy_MPIAIJ"
1995 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1996 {
1997   PetscErrorCode ierr;
1998   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1999   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2000 
2001   PetscFunctionBegin;
2002   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2003   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2004     /* because of the column compression in the off-processor part of the matrix a->B,
2005        the number of columns in a->B and b->B may be different, hence we cannot call
2006        the MatCopy() directly on the two parts. If need be, we can provide a more
2007        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2008        then copying the submatrices */
2009     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2010   } else {
2011     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2012     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2013   }
2014   PetscFunctionReturn(0);
2015 }
2016 
2017 #undef __FUNCT__
2018 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
2019 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
2020 {
2021   PetscErrorCode ierr;
2022 
2023   PetscFunctionBegin;
2024   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2025   PetscFunctionReturn(0);
2026 }
2027 
2028 #undef __FUNCT__
2029 #define __FUNCT__ "MatAXPY_MPIAIJ"
2030 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2031 {
2032   PetscErrorCode ierr;
2033   PetscInt       i;
2034   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2035   PetscBLASInt   bnz,one=1;
2036   Mat_SeqAIJ     *x,*y;
2037 
2038   PetscFunctionBegin;
2039   if (str == SAME_NONZERO_PATTERN) {
2040     PetscScalar alpha = a;
2041     x = (Mat_SeqAIJ *)xx->A->data;
2042     y = (Mat_SeqAIJ *)yy->A->data;
2043     bnz = PetscBLASIntCast(x->nz);
2044     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2045     x = (Mat_SeqAIJ *)xx->B->data;
2046     y = (Mat_SeqAIJ *)yy->B->data;
2047     bnz = PetscBLASIntCast(x->nz);
2048     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2049   } else if (str == SUBSET_NONZERO_PATTERN) {
2050     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2051 
2052     x = (Mat_SeqAIJ *)xx->B->data;
2053     y = (Mat_SeqAIJ *)yy->B->data;
2054     if (y->xtoy && y->XtoY != xx->B) {
2055       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2056       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2057     }
2058     if (!y->xtoy) { /* get xtoy */
2059       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2060       y->XtoY = xx->B;
2061       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2062     }
2063     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2064   } else {
2065     Mat B;
2066     PetscInt *nnz_d,*nnz_o;
2067     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2068     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2069     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2070     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2071     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2072     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2073     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2074     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr);
2075     ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr);
2076     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2077     ierr = MatHeaderReplace(Y,B);
2078     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2079     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2080   }
2081   PetscFunctionReturn(0);
2082 }
2083 
2084 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
2085 
2086 #undef __FUNCT__
2087 #define __FUNCT__ "MatConjugate_MPIAIJ"
2088 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
2089 {
2090 #if defined(PETSC_USE_COMPLEX)
2091   PetscErrorCode ierr;
2092   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2093 
2094   PetscFunctionBegin;
2095   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2096   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2097 #else
2098   PetscFunctionBegin;
2099 #endif
2100   PetscFunctionReturn(0);
2101 }
2102 
2103 #undef __FUNCT__
2104 #define __FUNCT__ "MatRealPart_MPIAIJ"
2105 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2106 {
2107   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2108   PetscErrorCode ierr;
2109 
2110   PetscFunctionBegin;
2111   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2112   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2113   PetscFunctionReturn(0);
2114 }
2115 
2116 #undef __FUNCT__
2117 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2118 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2119 {
2120   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2121   PetscErrorCode ierr;
2122 
2123   PetscFunctionBegin;
2124   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2125   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2126   PetscFunctionReturn(0);
2127 }
2128 
2129 #ifdef PETSC_HAVE_PBGL
2130 
2131 #include <boost/parallel/mpi/bsp_process_group.hpp>
2132 #include <boost/graph/distributed/ilu_default_graph.hpp>
2133 #include <boost/graph/distributed/ilu_0_block.hpp>
2134 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2135 #include <boost/graph/distributed/petsc/interface.hpp>
2136 #include <boost/multi_array.hpp>
2137 #include <boost/parallel/distributed_property_map->hpp>
2138 
2139 #undef __FUNCT__
2140 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2141 /*
2142   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2143 */
2144 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2145 {
2146   namespace petsc = boost::distributed::petsc;
2147 
2148   namespace graph_dist = boost::graph::distributed;
2149   using boost::graph::distributed::ilu_default::process_group_type;
2150   using boost::graph::ilu_permuted;
2151 
2152   PetscBool       row_identity, col_identity;
2153   PetscContainer  c;
2154   PetscInt        m, n, M, N;
2155   PetscErrorCode  ierr;
2156 
2157   PetscFunctionBegin;
2158   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2159   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2160   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2161   if (!row_identity || !col_identity) {
2162     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2163   }
2164 
2165   process_group_type pg;
2166   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2167   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2168   lgraph_type&   level_graph = *lgraph_p;
2169   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2170 
2171   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2172   ilu_permuted(level_graph);
2173 
2174   /* put together the new matrix */
2175   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2176   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2177   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2178   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2179   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2180   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2181   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2182 
2183   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2184   ierr = PetscContainerSetPointer(c, lgraph_p);
2185   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2186   PetscFunctionReturn(0);
2187 }
2188 
2189 #undef __FUNCT__
2190 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2191 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2192 {
2193   PetscFunctionBegin;
2194   PetscFunctionReturn(0);
2195 }
2196 
2197 #undef __FUNCT__
2198 #define __FUNCT__ "MatSolve_MPIAIJ"
2199 /*
2200   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2201 */
2202 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2203 {
2204   namespace graph_dist = boost::graph::distributed;
2205 
2206   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2207   lgraph_type*   lgraph_p;
2208   PetscContainer c;
2209   PetscErrorCode ierr;
2210 
2211   PetscFunctionBegin;
2212   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2213   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2214   ierr = VecCopy(b, x);CHKERRQ(ierr);
2215 
2216   PetscScalar* array_x;
2217   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2218   PetscInt sx;
2219   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2220 
2221   PetscScalar* array_b;
2222   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2223   PetscInt sb;
2224   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2225 
2226   lgraph_type&   level_graph = *lgraph_p;
2227   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2228 
2229   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2230   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2231                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2232 
2233   typedef boost::iterator_property_map<array_ref_type::iterator,
2234                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2235   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2236                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2237 
2238   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2239 
2240   PetscFunctionReturn(0);
2241 }
2242 #endif
2243 
2244 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2245   PetscInt       nzlocal,nsends,nrecvs;
2246   PetscMPIInt    *send_rank,*recv_rank;
2247   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2248   PetscScalar    *sbuf_a,**rbuf_a;
2249   PetscErrorCode (*MatDestroy)(Mat);
2250 } Mat_Redundant;
2251 
2252 #undef __FUNCT__
2253 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2254 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2255 {
2256   PetscErrorCode       ierr;
2257   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2258   PetscInt             i;
2259 
2260   PetscFunctionBegin;
2261   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2262   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2263   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2264   for (i=0; i<redund->nrecvs; i++){
2265     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2266     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2267   }
2268   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2269   ierr = PetscFree(redund);CHKERRQ(ierr);
2270   PetscFunctionReturn(0);
2271 }
2272 
2273 #undef __FUNCT__
2274 #define __FUNCT__ "MatDestroy_MatRedundant"
2275 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2276 {
2277   PetscErrorCode  ierr;
2278   PetscContainer  container;
2279   Mat_Redundant   *redund=PETSC_NULL;
2280 
2281   PetscFunctionBegin;
2282   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2283   if (container) {
2284     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2285   } else {
2286     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2287   }
2288   A->ops->destroy = redund->MatDestroy;
2289   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2290   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2291   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2292   PetscFunctionReturn(0);
2293 }
2294 
2295 #undef __FUNCT__
2296 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2297 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2298 {
2299   PetscMPIInt    rank,size;
2300   MPI_Comm       comm=((PetscObject)mat)->comm;
2301   PetscErrorCode ierr;
2302   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2303   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2304   PetscInt       *rowrange=mat->rmap->range;
2305   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2306   Mat            A=aij->A,B=aij->B,C=*matredundant;
2307   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2308   PetscScalar    *sbuf_a;
2309   PetscInt       nzlocal=a->nz+b->nz;
2310   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2311   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2312   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2313   MatScalar      *aworkA,*aworkB;
2314   PetscScalar    *vals;
2315   PetscMPIInt    tag1,tag2,tag3,imdex;
2316   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2317                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2318   MPI_Status     recv_status,*send_status;
2319   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2320   PetscInt       **rbuf_j=PETSC_NULL;
2321   PetscScalar    **rbuf_a=PETSC_NULL;
2322   Mat_Redundant  *redund=PETSC_NULL;
2323   PetscContainer container;
2324 
2325   PetscFunctionBegin;
2326   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2327   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2328 
2329   if (reuse == MAT_REUSE_MATRIX) {
2330     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2331     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2332     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2333     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2334     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2335     if (container) {
2336       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2337     } else {
2338       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2339     }
2340     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2341 
2342     nsends    = redund->nsends;
2343     nrecvs    = redund->nrecvs;
2344     send_rank = redund->send_rank;
2345     recv_rank = redund->recv_rank;
2346     sbuf_nz   = redund->sbuf_nz;
2347     rbuf_nz   = redund->rbuf_nz;
2348     sbuf_j    = redund->sbuf_j;
2349     sbuf_a    = redund->sbuf_a;
2350     rbuf_j    = redund->rbuf_j;
2351     rbuf_a    = redund->rbuf_a;
2352   }
2353 
2354   if (reuse == MAT_INITIAL_MATRIX){
2355     PetscMPIInt  subrank,subsize;
2356     PetscInt     nleftover,np_subcomm;
2357     /* get the destination processors' id send_rank, nsends and nrecvs */
2358     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2359     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2360     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2361     np_subcomm = size/nsubcomm;
2362     nleftover  = size - nsubcomm*np_subcomm;
2363     nsends = 0; nrecvs = 0;
2364     for (i=0; i<size; i++){ /* i=rank*/
2365       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2366         send_rank[nsends] = i; nsends++;
2367         recv_rank[nrecvs++] = i;
2368       }
2369     }
2370     if (rank >= size - nleftover){/* this proc is a leftover processor */
2371       i = size-nleftover-1;
2372       j = 0;
2373       while (j < nsubcomm - nleftover){
2374         send_rank[nsends++] = i;
2375         i--; j++;
2376       }
2377     }
2378 
2379     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2380       for (i=0; i<nleftover; i++){
2381         recv_rank[nrecvs++] = size-nleftover+i;
2382       }
2383     }
2384 
2385     /* allocate sbuf_j, sbuf_a */
2386     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2387     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2388     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2389   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2390 
2391   /* copy mat's local entries into the buffers */
2392   if (reuse == MAT_INITIAL_MATRIX){
2393     rownz_max = 0;
2394     rptr = sbuf_j;
2395     cols = sbuf_j + rend-rstart + 1;
2396     vals = sbuf_a;
2397     rptr[0] = 0;
2398     for (i=0; i<rend-rstart; i++){
2399       row = i + rstart;
2400       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2401       ncols  = nzA + nzB;
2402       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2403       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2404       /* load the column indices for this row into cols */
2405       lwrite = 0;
2406       for (l=0; l<nzB; l++) {
2407         if ((ctmp = bmap[cworkB[l]]) < cstart){
2408           vals[lwrite]   = aworkB[l];
2409           cols[lwrite++] = ctmp;
2410         }
2411       }
2412       for (l=0; l<nzA; l++){
2413         vals[lwrite]   = aworkA[l];
2414         cols[lwrite++] = cstart + cworkA[l];
2415       }
2416       for (l=0; l<nzB; l++) {
2417         if ((ctmp = bmap[cworkB[l]]) >= cend){
2418           vals[lwrite]   = aworkB[l];
2419           cols[lwrite++] = ctmp;
2420         }
2421       }
2422       vals += ncols;
2423       cols += ncols;
2424       rptr[i+1] = rptr[i] + ncols;
2425       if (rownz_max < ncols) rownz_max = ncols;
2426     }
2427     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2428   } else { /* only copy matrix values into sbuf_a */
2429     rptr = sbuf_j;
2430     vals = sbuf_a;
2431     rptr[0] = 0;
2432     for (i=0; i<rend-rstart; i++){
2433       row = i + rstart;
2434       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2435       ncols  = nzA + nzB;
2436       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2437       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2438       lwrite = 0;
2439       for (l=0; l<nzB; l++) {
2440         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2441       }
2442       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2443       for (l=0; l<nzB; l++) {
2444         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2445       }
2446       vals += ncols;
2447       rptr[i+1] = rptr[i] + ncols;
2448     }
2449   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2450 
2451   /* send nzlocal to others, and recv other's nzlocal */
2452   /*--------------------------------------------------*/
2453   if (reuse == MAT_INITIAL_MATRIX){
2454     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2455     s_waits2 = s_waits3 + nsends;
2456     s_waits1 = s_waits2 + nsends;
2457     r_waits1 = s_waits1 + nsends;
2458     r_waits2 = r_waits1 + nrecvs;
2459     r_waits3 = r_waits2 + nrecvs;
2460   } else {
2461     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2462     r_waits3 = s_waits3 + nsends;
2463   }
2464 
2465   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2466   if (reuse == MAT_INITIAL_MATRIX){
2467     /* get new tags to keep the communication clean */
2468     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2469     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2470     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2471 
2472     /* post receives of other's nzlocal */
2473     for (i=0; i<nrecvs; i++){
2474       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2475     }
2476     /* send nzlocal to others */
2477     for (i=0; i<nsends; i++){
2478       sbuf_nz[i] = nzlocal;
2479       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2480     }
2481     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2482     count = nrecvs;
2483     while (count) {
2484       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2485       recv_rank[imdex] = recv_status.MPI_SOURCE;
2486       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2487       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2488 
2489       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2490       rbuf_nz[imdex] += i + 2;
2491       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2492       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2493       count--;
2494     }
2495     /* wait on sends of nzlocal */
2496     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2497     /* send mat->i,j to others, and recv from other's */
2498     /*------------------------------------------------*/
2499     for (i=0; i<nsends; i++){
2500       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2501       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2502     }
2503     /* wait on receives of mat->i,j */
2504     /*------------------------------*/
2505     count = nrecvs;
2506     while (count) {
2507       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2508       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2509       count--;
2510     }
2511     /* wait on sends of mat->i,j */
2512     /*---------------------------*/
2513     if (nsends) {
2514       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2515     }
2516   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2517 
2518   /* post receives, send and receive mat->a */
2519   /*----------------------------------------*/
2520   for (imdex=0; imdex<nrecvs; imdex++) {
2521     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2522   }
2523   for (i=0; i<nsends; i++){
2524     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2525   }
2526   count = nrecvs;
2527   while (count) {
2528     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2529     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2530     count--;
2531   }
2532   if (nsends) {
2533     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2534   }
2535 
2536   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2537 
2538   /* create redundant matrix */
2539   /*-------------------------*/
2540   if (reuse == MAT_INITIAL_MATRIX){
2541     /* compute rownz_max for preallocation */
2542     for (imdex=0; imdex<nrecvs; imdex++){
2543       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2544       rptr = rbuf_j[imdex];
2545       for (i=0; i<j; i++){
2546         ncols = rptr[i+1] - rptr[i];
2547         if (rownz_max < ncols) rownz_max = ncols;
2548       }
2549     }
2550 
2551     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2552     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2553     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2554     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2555     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2556   } else {
2557     C = *matredundant;
2558   }
2559 
2560   /* insert local matrix entries */
2561   rptr = sbuf_j;
2562   cols = sbuf_j + rend-rstart + 1;
2563   vals = sbuf_a;
2564   for (i=0; i<rend-rstart; i++){
2565     row   = i + rstart;
2566     ncols = rptr[i+1] - rptr[i];
2567     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2568     vals += ncols;
2569     cols += ncols;
2570   }
2571   /* insert received matrix entries */
2572   for (imdex=0; imdex<nrecvs; imdex++){
2573     rstart = rowrange[recv_rank[imdex]];
2574     rend   = rowrange[recv_rank[imdex]+1];
2575     rptr = rbuf_j[imdex];
2576     cols = rbuf_j[imdex] + rend-rstart + 1;
2577     vals = rbuf_a[imdex];
2578     for (i=0; i<rend-rstart; i++){
2579       row   = i + rstart;
2580       ncols = rptr[i+1] - rptr[i];
2581       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2582       vals += ncols;
2583       cols += ncols;
2584     }
2585   }
2586   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2587   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2588   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2589   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2590   if (reuse == MAT_INITIAL_MATRIX){
2591     PetscContainer container;
2592     *matredundant = C;
2593     /* create a supporting struct and attach it to C for reuse */
2594     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2595     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2596     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2597     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2598     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2599 
2600     redund->nzlocal = nzlocal;
2601     redund->nsends  = nsends;
2602     redund->nrecvs  = nrecvs;
2603     redund->send_rank = send_rank;
2604     redund->recv_rank = recv_rank;
2605     redund->sbuf_nz = sbuf_nz;
2606     redund->rbuf_nz = rbuf_nz;
2607     redund->sbuf_j  = sbuf_j;
2608     redund->sbuf_a  = sbuf_a;
2609     redund->rbuf_j  = rbuf_j;
2610     redund->rbuf_a  = rbuf_a;
2611 
2612     redund->MatDestroy = C->ops->destroy;
2613     C->ops->destroy    = MatDestroy_MatRedundant;
2614   }
2615   PetscFunctionReturn(0);
2616 }
2617 
2618 #undef __FUNCT__
2619 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2620 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2621 {
2622   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2623   PetscErrorCode ierr;
2624   PetscInt       i,*idxb = 0;
2625   PetscScalar    *va,*vb;
2626   Vec            vtmp;
2627 
2628   PetscFunctionBegin;
2629   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2630   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2631   if (idx) {
2632     for (i=0; i<A->rmap->n; i++) {
2633       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2634     }
2635   }
2636 
2637   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2638   if (idx) {
2639     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2640   }
2641   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2642   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2643 
2644   for (i=0; i<A->rmap->n; i++){
2645     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2646       va[i] = vb[i];
2647       if (idx) idx[i] = a->garray[idxb[i]];
2648     }
2649   }
2650 
2651   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2652   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2653   if (idxb) {
2654     ierr = PetscFree(idxb);CHKERRQ(ierr);
2655   }
2656   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2657   PetscFunctionReturn(0);
2658 }
2659 
2660 #undef __FUNCT__
2661 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2662 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2663 {
2664   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2665   PetscErrorCode ierr;
2666   PetscInt       i,*idxb = 0;
2667   PetscScalar    *va,*vb;
2668   Vec            vtmp;
2669 
2670   PetscFunctionBegin;
2671   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2672   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2673   if (idx) {
2674     for (i=0; i<A->cmap->n; i++) {
2675       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2676     }
2677   }
2678 
2679   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2680   if (idx) {
2681     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2682   }
2683   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2684   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2685 
2686   for (i=0; i<A->rmap->n; i++){
2687     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2688       va[i] = vb[i];
2689       if (idx) idx[i] = a->garray[idxb[i]];
2690     }
2691   }
2692 
2693   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2694   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2695   if (idxb) {
2696     ierr = PetscFree(idxb);CHKERRQ(ierr);
2697   }
2698   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2699   PetscFunctionReturn(0);
2700 }
2701 
2702 #undef __FUNCT__
2703 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2704 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2705 {
2706   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2707   PetscInt       n      = A->rmap->n;
2708   PetscInt       cstart = A->cmap->rstart;
2709   PetscInt      *cmap   = mat->garray;
2710   PetscInt      *diagIdx, *offdiagIdx;
2711   Vec            diagV, offdiagV;
2712   PetscScalar   *a, *diagA, *offdiagA;
2713   PetscInt       r;
2714   PetscErrorCode ierr;
2715 
2716   PetscFunctionBegin;
2717   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2718   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2719   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2720   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2721   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2722   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2723   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2724   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2725   for(r = 0; r < n; ++r) {
2726     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2727       a[r]   = diagA[r];
2728       idx[r] = cstart + diagIdx[r];
2729     } else {
2730       a[r]   = offdiagA[r];
2731       idx[r] = cmap[offdiagIdx[r]];
2732     }
2733   }
2734   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2735   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2736   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2737   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2738   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2739   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2740   PetscFunctionReturn(0);
2741 }
2742 
2743 #undef __FUNCT__
2744 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2745 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2746 {
2747   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2748   PetscInt       n      = A->rmap->n;
2749   PetscInt       cstart = A->cmap->rstart;
2750   PetscInt      *cmap   = mat->garray;
2751   PetscInt      *diagIdx, *offdiagIdx;
2752   Vec            diagV, offdiagV;
2753   PetscScalar   *a, *diagA, *offdiagA;
2754   PetscInt       r;
2755   PetscErrorCode ierr;
2756 
2757   PetscFunctionBegin;
2758   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2759   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2760   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2761   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2762   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2763   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2764   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2765   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2766   for(r = 0; r < n; ++r) {
2767     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2768       a[r]   = diagA[r];
2769       idx[r] = cstart + diagIdx[r];
2770     } else {
2771       a[r]   = offdiagA[r];
2772       idx[r] = cmap[offdiagIdx[r]];
2773     }
2774   }
2775   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2776   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2777   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2778   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2779   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2780   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2781   PetscFunctionReturn(0);
2782 }
2783 
2784 #undef __FUNCT__
2785 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2786 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2787 {
2788   PetscErrorCode ierr;
2789   Mat            *dummy;
2790 
2791   PetscFunctionBegin;
2792   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2793   *newmat = *dummy;
2794   ierr = PetscFree(dummy);CHKERRQ(ierr);
2795   PetscFunctionReturn(0);
2796 }
2797 
2798 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2799 /* -------------------------------------------------------------------*/
2800 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2801        MatGetRow_MPIAIJ,
2802        MatRestoreRow_MPIAIJ,
2803        MatMult_MPIAIJ,
2804 /* 4*/ MatMultAdd_MPIAIJ,
2805        MatMultTranspose_MPIAIJ,
2806        MatMultTransposeAdd_MPIAIJ,
2807 #ifdef PETSC_HAVE_PBGL
2808        MatSolve_MPIAIJ,
2809 #else
2810        0,
2811 #endif
2812        0,
2813        0,
2814 /*10*/ 0,
2815        0,
2816        0,
2817        MatSOR_MPIAIJ,
2818        MatTranspose_MPIAIJ,
2819 /*15*/ MatGetInfo_MPIAIJ,
2820        MatEqual_MPIAIJ,
2821        MatGetDiagonal_MPIAIJ,
2822        MatDiagonalScale_MPIAIJ,
2823        MatNorm_MPIAIJ,
2824 /*20*/ MatAssemblyBegin_MPIAIJ,
2825        MatAssemblyEnd_MPIAIJ,
2826        MatSetOption_MPIAIJ,
2827        MatZeroEntries_MPIAIJ,
2828 /*24*/ MatZeroRows_MPIAIJ,
2829        0,
2830 #ifdef PETSC_HAVE_PBGL
2831        0,
2832 #else
2833        0,
2834 #endif
2835        0,
2836        0,
2837 /*29*/ MatSetUpPreallocation_MPIAIJ,
2838 #ifdef PETSC_HAVE_PBGL
2839        0,
2840 #else
2841        0,
2842 #endif
2843        0,
2844        0,
2845        0,
2846 /*34*/ MatDuplicate_MPIAIJ,
2847        0,
2848        0,
2849        0,
2850        0,
2851 /*39*/ MatAXPY_MPIAIJ,
2852        MatGetSubMatrices_MPIAIJ,
2853        MatIncreaseOverlap_MPIAIJ,
2854        MatGetValues_MPIAIJ,
2855        MatCopy_MPIAIJ,
2856 /*44*/ MatGetRowMax_MPIAIJ,
2857        MatScale_MPIAIJ,
2858        0,
2859        0,
2860        0,
2861 /*49*/ MatSetBlockSize_MPIAIJ,
2862        0,
2863        0,
2864        0,
2865        0,
2866 /*54*/ MatFDColoringCreate_MPIAIJ,
2867        0,
2868        MatSetUnfactored_MPIAIJ,
2869        MatPermute_MPIAIJ,
2870        0,
2871 /*59*/ MatGetSubMatrix_MPIAIJ,
2872        MatDestroy_MPIAIJ,
2873        MatView_MPIAIJ,
2874        0,
2875        0,
2876 /*64*/ 0,
2877        0,
2878        0,
2879        0,
2880        0,
2881 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2882        MatGetRowMinAbs_MPIAIJ,
2883        0,
2884        MatSetColoring_MPIAIJ,
2885 #if defined(PETSC_HAVE_ADIC)
2886        MatSetValuesAdic_MPIAIJ,
2887 #else
2888        0,
2889 #endif
2890        MatSetValuesAdifor_MPIAIJ,
2891 /*75*/ MatFDColoringApply_AIJ,
2892        0,
2893        0,
2894        0,
2895        0,
2896 /*80*/ 0,
2897        0,
2898        0,
2899 /*83*/ MatLoad_MPIAIJ,
2900        0,
2901        0,
2902        0,
2903        0,
2904        0,
2905 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2906        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2907        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2908        MatPtAP_Basic,
2909        MatPtAPSymbolic_MPIAIJ,
2910 /*94*/ MatPtAPNumeric_MPIAIJ,
2911        0,
2912        0,
2913        0,
2914        0,
2915 /*99*/ 0,
2916        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2917        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2918        MatConjugate_MPIAIJ,
2919        0,
2920 /*104*/MatSetValuesRow_MPIAIJ,
2921        MatRealPart_MPIAIJ,
2922        MatImaginaryPart_MPIAIJ,
2923        0,
2924        0,
2925 /*109*/0,
2926        MatGetRedundantMatrix_MPIAIJ,
2927        MatGetRowMin_MPIAIJ,
2928        0,
2929        0,
2930 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
2931        0,
2932        0,
2933        0,
2934        0,
2935 /*119*/0,
2936        0,
2937        0,
2938        0,
2939        MatGetMultiProcBlock_MPIAIJ
2940 };
2941 
2942 /* ----------------------------------------------------------------------------------------*/
2943 
2944 EXTERN_C_BEGIN
2945 #undef __FUNCT__
2946 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2947 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2948 {
2949   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2950   PetscErrorCode ierr;
2951 
2952   PetscFunctionBegin;
2953   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2954   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2955   PetscFunctionReturn(0);
2956 }
2957 EXTERN_C_END
2958 
2959 EXTERN_C_BEGIN
2960 #undef __FUNCT__
2961 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2962 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2963 {
2964   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2965   PetscErrorCode ierr;
2966 
2967   PetscFunctionBegin;
2968   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2969   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2970   PetscFunctionReturn(0);
2971 }
2972 EXTERN_C_END
2973 
2974 EXTERN_C_BEGIN
2975 #undef __FUNCT__
2976 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2977 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2978 {
2979   Mat_MPIAIJ     *b;
2980   PetscErrorCode ierr;
2981   PetscInt       i;
2982 
2983   PetscFunctionBegin;
2984   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2985   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2986   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2987   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2988 
2989   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2990   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2991   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2992   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2993   if (d_nnz) {
2994     for (i=0; i<B->rmap->n; i++) {
2995       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2996     }
2997   }
2998   if (o_nnz) {
2999     for (i=0; i<B->rmap->n; i++) {
3000       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3001     }
3002   }
3003   b = (Mat_MPIAIJ*)B->data;
3004 
3005   if (!B->preallocated) {
3006     /* Explicitly create 2 MATSEQAIJ matrices. */
3007     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3008     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3009     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3010     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3011     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3012     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3013     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3014     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3015   }
3016 
3017   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3018   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3019   B->preallocated = PETSC_TRUE;
3020   PetscFunctionReturn(0);
3021 }
3022 EXTERN_C_END
3023 
3024 #undef __FUNCT__
3025 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3026 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3027 {
3028   Mat            mat;
3029   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3030   PetscErrorCode ierr;
3031 
3032   PetscFunctionBegin;
3033   *newmat       = 0;
3034   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3035   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3036   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3037   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3038   a    = (Mat_MPIAIJ*)mat->data;
3039 
3040   mat->factortype    = matin->factortype;
3041   mat->rmap->bs      = matin->rmap->bs;
3042   mat->assembled    = PETSC_TRUE;
3043   mat->insertmode   = NOT_SET_VALUES;
3044   mat->preallocated = PETSC_TRUE;
3045 
3046   a->size           = oldmat->size;
3047   a->rank           = oldmat->rank;
3048   a->donotstash     = oldmat->donotstash;
3049   a->roworiented    = oldmat->roworiented;
3050   a->rowindices     = 0;
3051   a->rowvalues      = 0;
3052   a->getrowactive   = PETSC_FALSE;
3053 
3054   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3055   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3056 
3057   if (oldmat->colmap) {
3058 #if defined (PETSC_USE_CTABLE)
3059     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3060 #else
3061     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3062     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3063     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3064 #endif
3065   } else a->colmap = 0;
3066   if (oldmat->garray) {
3067     PetscInt len;
3068     len  = oldmat->B->cmap->n;
3069     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3070     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3071     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3072   } else a->garray = 0;
3073 
3074   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3075   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3076   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3077   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3078   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3079   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3080   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3081   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3082   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3083   *newmat = mat;
3084   PetscFunctionReturn(0);
3085 }
3086 
3087 /*
3088     Allows sending/receiving larger messages then 2 gigabytes in a single call
3089 */
3090 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm)
3091 {
3092   int             ierr;
3093   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
3094   PetscInt        i,numchunks;
3095   PetscMPIInt     icnt;
3096 
3097   numchunks = cnt/CHUNKSIZE + 1;
3098   for (i=0; i<numchunks; i++) {
3099     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
3100     ierr = MPI_Send(mess,icnt,type,to,tag,comm);
3101     if (type == MPIU_INT) {
3102       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
3103     } else if (type == MPIU_SCALAR) {
3104       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
3105     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
3106   }
3107   return 0;
3108 }
3109 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm)
3110 {
3111   int             ierr;
3112   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
3113   MPI_Status      status;
3114   PetscInt        i,numchunks;
3115   PetscMPIInt     icnt;
3116 
3117   numchunks = cnt/CHUNKSIZE + 1;
3118   for (i=0; i<numchunks; i++) {
3119     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
3120     ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status);
3121     if (type == MPIU_INT) {
3122       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
3123     } else if (type == MPIU_SCALAR) {
3124       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
3125     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
3126   }
3127   return 0;
3128 }
3129 
3130 #undef __FUNCT__
3131 #define __FUNCT__ "MatLoad_MPIAIJ"
3132 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3133 {
3134   PetscScalar    *vals,*svals;
3135   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3136   PetscErrorCode ierr;
3137   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3138   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3139   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3140   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3141   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3142   int            fd;
3143 
3144   PetscFunctionBegin;
3145   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3146   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3147   if (!rank) {
3148     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3149     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3150     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3151   }
3152 
3153   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3154 
3155   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3156   M = header[1]; N = header[2];
3157   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3158   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3159   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3160 
3161   /* If global sizes are set, check if they are consistent with that given in the file */
3162   if (sizesset) {
3163     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3164   }
3165   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3166   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3167 
3168   /* determine ownership of all rows */
3169   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3170   else m = newMat->rmap->n; /* Set by user */
3171 
3172   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3173   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3174 
3175   /* First process needs enough room for process with most rows */
3176   if (!rank) {
3177     mmax       = rowners[1];
3178     for (i=2; i<size; i++) {
3179       mmax = PetscMax(mmax,rowners[i]);
3180     }
3181   } else mmax = m;
3182 
3183   rowners[0] = 0;
3184   for (i=2; i<=size; i++) {
3185     rowners[i] += rowners[i-1];
3186   }
3187   rstart = rowners[rank];
3188   rend   = rowners[rank+1];
3189 
3190   /* distribute row lengths to all processors */
3191   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3192   if (!rank) {
3193     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3194     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3195     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3196     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3197     for (j=0; j<m; j++) {
3198       procsnz[0] += ourlens[j];
3199     }
3200     for (i=1; i<size; i++) {
3201       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3202       /* calculate the number of nonzeros on each processor */
3203       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3204         procsnz[i] += rowlengths[j];
3205       }
3206       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3207     }
3208     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3209   } else {
3210     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3211   }
3212 
3213   if (!rank) {
3214     /* determine max buffer needed and allocate it */
3215     maxnz = 0;
3216     for (i=0; i<size; i++) {
3217       maxnz = PetscMax(maxnz,procsnz[i]);
3218     }
3219     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3220 
3221     /* read in my part of the matrix column indices  */
3222     nz   = procsnz[0];
3223     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3224     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3225 
3226     /* read in every one elses and ship off */
3227     for (i=1; i<size; i++) {
3228       nz     = procsnz[i];
3229       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3230       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3231     }
3232     ierr = PetscFree(cols);CHKERRQ(ierr);
3233   } else {
3234     /* determine buffer space needed for message */
3235     nz = 0;
3236     for (i=0; i<m; i++) {
3237       nz += ourlens[i];
3238     }
3239     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3240 
3241     /* receive message of column indices*/
3242     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3243   }
3244 
3245   /* determine column ownership if matrix is not square */
3246   if (N != M) {
3247     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3248     else n = newMat->cmap->n;
3249     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3250     cstart = cend - n;
3251   } else {
3252     cstart = rstart;
3253     cend   = rend;
3254     n      = cend - cstart;
3255   }
3256 
3257   /* loop over local rows, determining number of off diagonal entries */
3258   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3259   jj = 0;
3260   for (i=0; i<m; i++) {
3261     for (j=0; j<ourlens[i]; j++) {
3262       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3263       jj++;
3264     }
3265   }
3266 
3267   for (i=0; i<m; i++) {
3268     ourlens[i] -= offlens[i];
3269   }
3270   if (!sizesset) {
3271     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3272   }
3273   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3274 
3275   for (i=0; i<m; i++) {
3276     ourlens[i] += offlens[i];
3277   }
3278 
3279   if (!rank) {
3280     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3281 
3282     /* read in my part of the matrix numerical values  */
3283     nz   = procsnz[0];
3284     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3285 
3286     /* insert into matrix */
3287     jj      = rstart;
3288     smycols = mycols;
3289     svals   = vals;
3290     for (i=0; i<m; i++) {
3291       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3292       smycols += ourlens[i];
3293       svals   += ourlens[i];
3294       jj++;
3295     }
3296 
3297     /* read in other processors and ship out */
3298     for (i=1; i<size; i++) {
3299       nz     = procsnz[i];
3300       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3301       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3302     }
3303     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3304   } else {
3305     /* receive numeric values */
3306     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3307 
3308     /* receive message of values*/
3309     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3310 
3311     /* insert into matrix */
3312     jj      = rstart;
3313     smycols = mycols;
3314     svals   = vals;
3315     for (i=0; i<m; i++) {
3316       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3317       smycols += ourlens[i];
3318       svals   += ourlens[i];
3319       jj++;
3320     }
3321   }
3322   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3323   ierr = PetscFree(vals);CHKERRQ(ierr);
3324   ierr = PetscFree(mycols);CHKERRQ(ierr);
3325   ierr = PetscFree(rowners);CHKERRQ(ierr);
3326 
3327   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3328   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3329   PetscFunctionReturn(0);
3330 }
3331 
3332 #undef __FUNCT__
3333 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3334 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3335 {
3336   PetscErrorCode ierr;
3337   IS             iscol_local;
3338   PetscInt       csize;
3339 
3340   PetscFunctionBegin;
3341   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3342   if (call == MAT_REUSE_MATRIX) {
3343     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3344     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3345   } else {
3346     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3347   }
3348   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3349   if (call == MAT_INITIAL_MATRIX) {
3350     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3351     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3352   }
3353   PetscFunctionReturn(0);
3354 }
3355 
3356 #undef __FUNCT__
3357 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3358 /*
3359     Not great since it makes two copies of the submatrix, first an SeqAIJ
3360   in local and then by concatenating the local matrices the end result.
3361   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3362 
3363   Note: This requires a sequential iscol with all indices.
3364 */
3365 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3366 {
3367   PetscErrorCode ierr;
3368   PetscMPIInt    rank,size;
3369   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3370   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3371   Mat            *local,M,Mreuse;
3372   MatScalar      *vwork,*aa;
3373   MPI_Comm       comm = ((PetscObject)mat)->comm;
3374   Mat_SeqAIJ     *aij;
3375 
3376 
3377   PetscFunctionBegin;
3378   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3379   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3380 
3381   if (call ==  MAT_REUSE_MATRIX) {
3382     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3383     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3384     local = &Mreuse;
3385     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3386   } else {
3387     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3388     Mreuse = *local;
3389     ierr   = PetscFree(local);CHKERRQ(ierr);
3390   }
3391 
3392   /*
3393       m - number of local rows
3394       n - number of columns (same on all processors)
3395       rstart - first row in new global matrix generated
3396   */
3397   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3398   if (call == MAT_INITIAL_MATRIX) {
3399     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3400     ii  = aij->i;
3401     jj  = aij->j;
3402 
3403     /*
3404         Determine the number of non-zeros in the diagonal and off-diagonal
3405         portions of the matrix in order to do correct preallocation
3406     */
3407 
3408     /* first get start and end of "diagonal" columns */
3409     if (csize == PETSC_DECIDE) {
3410       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3411       if (mglobal == n) { /* square matrix */
3412 	nlocal = m;
3413       } else {
3414         nlocal = n/size + ((n % size) > rank);
3415       }
3416     } else {
3417       nlocal = csize;
3418     }
3419     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3420     rstart = rend - nlocal;
3421     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3422 
3423     /* next, compute all the lengths */
3424     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3425     olens = dlens + m;
3426     for (i=0; i<m; i++) {
3427       jend = ii[i+1] - ii[i];
3428       olen = 0;
3429       dlen = 0;
3430       for (j=0; j<jend; j++) {
3431         if (*jj < rstart || *jj >= rend) olen++;
3432         else dlen++;
3433         jj++;
3434       }
3435       olens[i] = olen;
3436       dlens[i] = dlen;
3437     }
3438     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3439     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3440     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3441     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3442     ierr = PetscFree(dlens);CHKERRQ(ierr);
3443   } else {
3444     PetscInt ml,nl;
3445 
3446     M = *newmat;
3447     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3448     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3449     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3450     /*
3451          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3452        rather than the slower MatSetValues().
3453     */
3454     M->was_assembled = PETSC_TRUE;
3455     M->assembled     = PETSC_FALSE;
3456   }
3457   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3458   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3459   ii  = aij->i;
3460   jj  = aij->j;
3461   aa  = aij->a;
3462   for (i=0; i<m; i++) {
3463     row   = rstart + i;
3464     nz    = ii[i+1] - ii[i];
3465     cwork = jj;     jj += nz;
3466     vwork = aa;     aa += nz;
3467     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3468   }
3469 
3470   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3471   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3472   *newmat = M;
3473 
3474   /* save submatrix used in processor for next request */
3475   if (call ==  MAT_INITIAL_MATRIX) {
3476     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3477     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3478   }
3479 
3480   PetscFunctionReturn(0);
3481 }
3482 
3483 EXTERN_C_BEGIN
3484 #undef __FUNCT__
3485 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3486 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3487 {
3488   PetscInt       m,cstart, cend,j,nnz,i,d;
3489   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3490   const PetscInt *JJ;
3491   PetscScalar    *values;
3492   PetscErrorCode ierr;
3493 
3494   PetscFunctionBegin;
3495   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3496 
3497   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3498   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3499   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3500   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3501   m      = B->rmap->n;
3502   cstart = B->cmap->rstart;
3503   cend   = B->cmap->rend;
3504   rstart = B->rmap->rstart;
3505 
3506   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3507 
3508 #if defined(PETSC_USE_DEBUGGING)
3509   for (i=0; i<m; i++) {
3510     nnz     = Ii[i+1]- Ii[i];
3511     JJ      = J + Ii[i];
3512     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3513     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3514     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3515   }
3516 #endif
3517 
3518   for (i=0; i<m; i++) {
3519     nnz     = Ii[i+1]- Ii[i];
3520     JJ      = J + Ii[i];
3521     nnz_max = PetscMax(nnz_max,nnz);
3522     d       = 0;
3523     for (j=0; j<nnz; j++) {
3524       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3525     }
3526     d_nnz[i] = d;
3527     o_nnz[i] = nnz - d;
3528   }
3529   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3530   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3531 
3532   if (v) values = (PetscScalar*)v;
3533   else {
3534     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3535     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3536   }
3537 
3538   for (i=0; i<m; i++) {
3539     ii   = i + rstart;
3540     nnz  = Ii[i+1]- Ii[i];
3541     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3542   }
3543   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3544   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3545 
3546   if (!v) {
3547     ierr = PetscFree(values);CHKERRQ(ierr);
3548   }
3549   PetscFunctionReturn(0);
3550 }
3551 EXTERN_C_END
3552 
3553 #undef __FUNCT__
3554 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3555 /*@
3556    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3557    (the default parallel PETSc format).
3558 
3559    Collective on MPI_Comm
3560 
3561    Input Parameters:
3562 +  B - the matrix
3563 .  i - the indices into j for the start of each local row (starts with zero)
3564 .  j - the column indices for each local row (starts with zero)
3565 -  v - optional values in the matrix
3566 
3567    Level: developer
3568 
3569    Notes:
3570        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3571      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3572      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3573 
3574        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3575 
3576        The format which is used for the sparse matrix input, is equivalent to a
3577     row-major ordering.. i.e for the following matrix, the input data expected is
3578     as shown:
3579 
3580         1 0 0
3581         2 0 3     P0
3582        -------
3583         4 5 6     P1
3584 
3585      Process0 [P0]: rows_owned=[0,1]
3586         i =  {0,1,3}  [size = nrow+1  = 2+1]
3587         j =  {0,0,2}  [size = nz = 6]
3588         v =  {1,2,3}  [size = nz = 6]
3589 
3590      Process1 [P1]: rows_owned=[2]
3591         i =  {0,3}    [size = nrow+1  = 1+1]
3592         j =  {0,1,2}  [size = nz = 6]
3593         v =  {4,5,6}  [size = nz = 6]
3594 
3595 .keywords: matrix, aij, compressed row, sparse, parallel
3596 
3597 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3598           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3599 @*/
3600 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3601 {
3602   PetscErrorCode ierr;
3603 
3604   PetscFunctionBegin;
3605   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3606   PetscFunctionReturn(0);
3607 }
3608 
3609 #undef __FUNCT__
3610 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3611 /*@C
3612    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3613    (the default parallel PETSc format).  For good matrix assembly performance
3614    the user should preallocate the matrix storage by setting the parameters
3615    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3616    performance can be increased by more than a factor of 50.
3617 
3618    Collective on MPI_Comm
3619 
3620    Input Parameters:
3621 +  A - the matrix
3622 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3623            (same value is used for all local rows)
3624 .  d_nnz - array containing the number of nonzeros in the various rows of the
3625            DIAGONAL portion of the local submatrix (possibly different for each row)
3626            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3627            The size of this array is equal to the number of local rows, i.e 'm'.
3628            You must leave room for the diagonal entry even if it is zero.
3629 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3630            submatrix (same value is used for all local rows).
3631 -  o_nnz - array containing the number of nonzeros in the various rows of the
3632            OFF-DIAGONAL portion of the local submatrix (possibly different for
3633            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3634            structure. The size of this array is equal to the number
3635            of local rows, i.e 'm'.
3636 
3637    If the *_nnz parameter is given then the *_nz parameter is ignored
3638 
3639    The AIJ format (also called the Yale sparse matrix format or
3640    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3641    storage.  The stored row and column indices begin with zero.
3642    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3643 
3644    The parallel matrix is partitioned such that the first m0 rows belong to
3645    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3646    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3647 
3648    The DIAGONAL portion of the local submatrix of a processor can be defined
3649    as the submatrix which is obtained by extraction the part corresponding to
3650    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3651    first row that belongs to the processor, r2 is the last row belonging to
3652    the this processor, and c1-c2 is range of indices of the local part of a
3653    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3654    common case of a square matrix, the row and column ranges are the same and
3655    the DIAGONAL part is also square. The remaining portion of the local
3656    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3657 
3658    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3659 
3660    You can call MatGetInfo() to get information on how effective the preallocation was;
3661    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3662    You can also run with the option -info and look for messages with the string
3663    malloc in them to see if additional memory allocation was needed.
3664 
3665    Example usage:
3666 
3667    Consider the following 8x8 matrix with 34 non-zero values, that is
3668    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3669    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3670    as follows:
3671 
3672 .vb
3673             1  2  0  |  0  3  0  |  0  4
3674     Proc0   0  5  6  |  7  0  0  |  8  0
3675             9  0 10  | 11  0  0  | 12  0
3676     -------------------------------------
3677            13  0 14  | 15 16 17  |  0  0
3678     Proc1   0 18  0  | 19 20 21  |  0  0
3679             0  0  0  | 22 23  0  | 24  0
3680     -------------------------------------
3681     Proc2  25 26 27  |  0  0 28  | 29  0
3682            30  0  0  | 31 32 33  |  0 34
3683 .ve
3684 
3685    This can be represented as a collection of submatrices as:
3686 
3687 .vb
3688       A B C
3689       D E F
3690       G H I
3691 .ve
3692 
3693    Where the submatrices A,B,C are owned by proc0, D,E,F are
3694    owned by proc1, G,H,I are owned by proc2.
3695 
3696    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3697    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3698    The 'M','N' parameters are 8,8, and have the same values on all procs.
3699 
3700    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3701    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3702    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3703    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3704    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3705    matrix, ans [DF] as another SeqAIJ matrix.
3706 
3707    When d_nz, o_nz parameters are specified, d_nz storage elements are
3708    allocated for every row of the local diagonal submatrix, and o_nz
3709    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3710    One way to choose d_nz and o_nz is to use the max nonzerors per local
3711    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3712    In this case, the values of d_nz,o_nz are:
3713 .vb
3714      proc0 : dnz = 2, o_nz = 2
3715      proc1 : dnz = 3, o_nz = 2
3716      proc2 : dnz = 1, o_nz = 4
3717 .ve
3718    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3719    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3720    for proc3. i.e we are using 12+15+10=37 storage locations to store
3721    34 values.
3722 
3723    When d_nnz, o_nnz parameters are specified, the storage is specified
3724    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3725    In the above case the values for d_nnz,o_nnz are:
3726 .vb
3727      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3728      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3729      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3730 .ve
3731    Here the space allocated is sum of all the above values i.e 34, and
3732    hence pre-allocation is perfect.
3733 
3734    Level: intermediate
3735 
3736 .keywords: matrix, aij, compressed row, sparse, parallel
3737 
3738 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3739           MPIAIJ, MatGetInfo()
3740 @*/
3741 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3742 {
3743   PetscErrorCode ierr;
3744 
3745   PetscFunctionBegin;
3746   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3747   PetscFunctionReturn(0);
3748 }
3749 
3750 #undef __FUNCT__
3751 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3752 /*@
3753      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3754          CSR format the local rows.
3755 
3756    Collective on MPI_Comm
3757 
3758    Input Parameters:
3759 +  comm - MPI communicator
3760 .  m - number of local rows (Cannot be PETSC_DECIDE)
3761 .  n - This value should be the same as the local size used in creating the
3762        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3763        calculated if N is given) For square matrices n is almost always m.
3764 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3765 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3766 .   i - row indices
3767 .   j - column indices
3768 -   a - matrix values
3769 
3770    Output Parameter:
3771 .   mat - the matrix
3772 
3773    Level: intermediate
3774 
3775    Notes:
3776        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3777      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3778      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3779 
3780        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3781 
3782        The format which is used for the sparse matrix input, is equivalent to a
3783     row-major ordering.. i.e for the following matrix, the input data expected is
3784     as shown:
3785 
3786         1 0 0
3787         2 0 3     P0
3788        -------
3789         4 5 6     P1
3790 
3791      Process0 [P0]: rows_owned=[0,1]
3792         i =  {0,1,3}  [size = nrow+1  = 2+1]
3793         j =  {0,0,2}  [size = nz = 6]
3794         v =  {1,2,3}  [size = nz = 6]
3795 
3796      Process1 [P1]: rows_owned=[2]
3797         i =  {0,3}    [size = nrow+1  = 1+1]
3798         j =  {0,1,2}  [size = nz = 6]
3799         v =  {4,5,6}  [size = nz = 6]
3800 
3801 .keywords: matrix, aij, compressed row, sparse, parallel
3802 
3803 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3804           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3805 @*/
3806 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3807 {
3808   PetscErrorCode ierr;
3809 
3810  PetscFunctionBegin;
3811   if (i[0]) {
3812     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3813   }
3814   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3815   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3816   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3817   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3818   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3819   PetscFunctionReturn(0);
3820 }
3821 
3822 #undef __FUNCT__
3823 #define __FUNCT__ "MatCreateMPIAIJ"
3824 /*@C
3825    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3826    (the default parallel PETSc format).  For good matrix assembly performance
3827    the user should preallocate the matrix storage by setting the parameters
3828    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3829    performance can be increased by more than a factor of 50.
3830 
3831    Collective on MPI_Comm
3832 
3833    Input Parameters:
3834 +  comm - MPI communicator
3835 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3836            This value should be the same as the local size used in creating the
3837            y vector for the matrix-vector product y = Ax.
3838 .  n - This value should be the same as the local size used in creating the
3839        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3840        calculated if N is given) For square matrices n is almost always m.
3841 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3842 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3843 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3844            (same value is used for all local rows)
3845 .  d_nnz - array containing the number of nonzeros in the various rows of the
3846            DIAGONAL portion of the local submatrix (possibly different for each row)
3847            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3848            The size of this array is equal to the number of local rows, i.e 'm'.
3849            You must leave room for the diagonal entry even if it is zero.
3850 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3851            submatrix (same value is used for all local rows).
3852 -  o_nnz - array containing the number of nonzeros in the various rows of the
3853            OFF-DIAGONAL portion of the local submatrix (possibly different for
3854            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3855            structure. The size of this array is equal to the number
3856            of local rows, i.e 'm'.
3857 
3858    Output Parameter:
3859 .  A - the matrix
3860 
3861    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3862    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3863    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3864 
3865    Notes:
3866    If the *_nnz parameter is given then the *_nz parameter is ignored
3867 
3868    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3869    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3870    storage requirements for this matrix.
3871 
3872    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3873    processor than it must be used on all processors that share the object for
3874    that argument.
3875 
3876    The user MUST specify either the local or global matrix dimensions
3877    (possibly both).
3878 
3879    The parallel matrix is partitioned across processors such that the
3880    first m0 rows belong to process 0, the next m1 rows belong to
3881    process 1, the next m2 rows belong to process 2 etc.. where
3882    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3883    values corresponding to [m x N] submatrix.
3884 
3885    The columns are logically partitioned with the n0 columns belonging
3886    to 0th partition, the next n1 columns belonging to the next
3887    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3888 
3889    The DIAGONAL portion of the local submatrix on any given processor
3890    is the submatrix corresponding to the rows and columns m,n
3891    corresponding to the given processor. i.e diagonal matrix on
3892    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3893    etc. The remaining portion of the local submatrix [m x (N-n)]
3894    constitute the OFF-DIAGONAL portion. The example below better
3895    illustrates this concept.
3896 
3897    For a square global matrix we define each processor's diagonal portion
3898    to be its local rows and the corresponding columns (a square submatrix);
3899    each processor's off-diagonal portion encompasses the remainder of the
3900    local matrix (a rectangular submatrix).
3901 
3902    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3903 
3904    When calling this routine with a single process communicator, a matrix of
3905    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3906    type of communicator, use the construction mechanism:
3907      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3908 
3909    By default, this format uses inodes (identical nodes) when possible.
3910    We search for consecutive rows with the same nonzero structure, thereby
3911    reusing matrix information to achieve increased efficiency.
3912 
3913    Options Database Keys:
3914 +  -mat_no_inode  - Do not use inodes
3915 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3916 -  -mat_aij_oneindex - Internally use indexing starting at 1
3917         rather than 0.  Note that when calling MatSetValues(),
3918         the user still MUST index entries starting at 0!
3919 
3920 
3921    Example usage:
3922 
3923    Consider the following 8x8 matrix with 34 non-zero values, that is
3924    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3925    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3926    as follows:
3927 
3928 .vb
3929             1  2  0  |  0  3  0  |  0  4
3930     Proc0   0  5  6  |  7  0  0  |  8  0
3931             9  0 10  | 11  0  0  | 12  0
3932     -------------------------------------
3933            13  0 14  | 15 16 17  |  0  0
3934     Proc1   0 18  0  | 19 20 21  |  0  0
3935             0  0  0  | 22 23  0  | 24  0
3936     -------------------------------------
3937     Proc2  25 26 27  |  0  0 28  | 29  0
3938            30  0  0  | 31 32 33  |  0 34
3939 .ve
3940 
3941    This can be represented as a collection of submatrices as:
3942 
3943 .vb
3944       A B C
3945       D E F
3946       G H I
3947 .ve
3948 
3949    Where the submatrices A,B,C are owned by proc0, D,E,F are
3950    owned by proc1, G,H,I are owned by proc2.
3951 
3952    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3953    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3954    The 'M','N' parameters are 8,8, and have the same values on all procs.
3955 
3956    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3957    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3958    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3959    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3960    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3961    matrix, ans [DF] as another SeqAIJ matrix.
3962 
3963    When d_nz, o_nz parameters are specified, d_nz storage elements are
3964    allocated for every row of the local diagonal submatrix, and o_nz
3965    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3966    One way to choose d_nz and o_nz is to use the max nonzerors per local
3967    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3968    In this case, the values of d_nz,o_nz are:
3969 .vb
3970      proc0 : dnz = 2, o_nz = 2
3971      proc1 : dnz = 3, o_nz = 2
3972      proc2 : dnz = 1, o_nz = 4
3973 .ve
3974    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3975    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3976    for proc3. i.e we are using 12+15+10=37 storage locations to store
3977    34 values.
3978 
3979    When d_nnz, o_nnz parameters are specified, the storage is specified
3980    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3981    In the above case the values for d_nnz,o_nnz are:
3982 .vb
3983      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3984      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3985      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3986 .ve
3987    Here the space allocated is sum of all the above values i.e 34, and
3988    hence pre-allocation is perfect.
3989 
3990    Level: intermediate
3991 
3992 .keywords: matrix, aij, compressed row, sparse, parallel
3993 
3994 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3995           MPIAIJ, MatCreateMPIAIJWithArrays()
3996 @*/
3997 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3998 {
3999   PetscErrorCode ierr;
4000   PetscMPIInt    size;
4001 
4002   PetscFunctionBegin;
4003   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4004   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4005   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4006   if (size > 1) {
4007     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4008     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4009   } else {
4010     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4011     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4012   }
4013   PetscFunctionReturn(0);
4014 }
4015 
4016 #undef __FUNCT__
4017 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4018 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4019 {
4020   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4021 
4022   PetscFunctionBegin;
4023   *Ad     = a->A;
4024   *Ao     = a->B;
4025   *colmap = a->garray;
4026   PetscFunctionReturn(0);
4027 }
4028 
4029 #undef __FUNCT__
4030 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4031 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4032 {
4033   PetscErrorCode ierr;
4034   PetscInt       i;
4035   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4036 
4037   PetscFunctionBegin;
4038   if (coloring->ctype == IS_COLORING_GLOBAL) {
4039     ISColoringValue *allcolors,*colors;
4040     ISColoring      ocoloring;
4041 
4042     /* set coloring for diagonal portion */
4043     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4044 
4045     /* set coloring for off-diagonal portion */
4046     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4047     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4048     for (i=0; i<a->B->cmap->n; i++) {
4049       colors[i] = allcolors[a->garray[i]];
4050     }
4051     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4052     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4053     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4054     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
4055   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4056     ISColoringValue *colors;
4057     PetscInt        *larray;
4058     ISColoring      ocoloring;
4059 
4060     /* set coloring for diagonal portion */
4061     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4062     for (i=0; i<a->A->cmap->n; i++) {
4063       larray[i] = i + A->cmap->rstart;
4064     }
4065     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4066     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4067     for (i=0; i<a->A->cmap->n; i++) {
4068       colors[i] = coloring->colors[larray[i]];
4069     }
4070     ierr = PetscFree(larray);CHKERRQ(ierr);
4071     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4072     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4073     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
4074 
4075     /* set coloring for off-diagonal portion */
4076     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4077     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4078     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4079     for (i=0; i<a->B->cmap->n; i++) {
4080       colors[i] = coloring->colors[larray[i]];
4081     }
4082     ierr = PetscFree(larray);CHKERRQ(ierr);
4083     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4084     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4085     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
4086   } else {
4087     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4088   }
4089 
4090   PetscFunctionReturn(0);
4091 }
4092 
4093 #if defined(PETSC_HAVE_ADIC)
4094 #undef __FUNCT__
4095 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4096 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4097 {
4098   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4099   PetscErrorCode ierr;
4100 
4101   PetscFunctionBegin;
4102   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4103   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4104   PetscFunctionReturn(0);
4105 }
4106 #endif
4107 
4108 #undef __FUNCT__
4109 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4110 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4111 {
4112   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4113   PetscErrorCode ierr;
4114 
4115   PetscFunctionBegin;
4116   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4117   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4118   PetscFunctionReturn(0);
4119 }
4120 
4121 #undef __FUNCT__
4122 #define __FUNCT__ "MatMerge"
4123 /*@
4124       MatMerge - Creates a single large PETSc matrix by concatinating sequential
4125                  matrices from each processor
4126 
4127     Collective on MPI_Comm
4128 
4129    Input Parameters:
4130 +    comm - the communicators the parallel matrix will live on
4131 .    inmat - the input sequential matrices
4132 .    n - number of local columns (or PETSC_DECIDE)
4133 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4134 
4135    Output Parameter:
4136 .    outmat - the parallel matrix generated
4137 
4138     Level: advanced
4139 
4140    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4141 
4142 @*/
4143 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4144 {
4145   PetscErrorCode ierr;
4146   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
4147   PetscInt       *indx;
4148   PetscScalar    *values;
4149 
4150   PetscFunctionBegin;
4151   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4152   if (scall == MAT_INITIAL_MATRIX){
4153     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
4154     if (n == PETSC_DECIDE){
4155       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4156     }
4157     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4158     rstart -= m;
4159 
4160     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4161     for (i=0;i<m;i++) {
4162       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4163       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4164       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4165     }
4166     /* This routine will ONLY return MPIAIJ type matrix */
4167     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4168     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4169     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4170     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4171     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4172 
4173   } else if (scall == MAT_REUSE_MATRIX){
4174     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4175   } else {
4176     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4177   }
4178 
4179   for (i=0;i<m;i++) {
4180     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4181     Ii    = i + rstart;
4182     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4183     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4184   }
4185   ierr = MatDestroy(inmat);CHKERRQ(ierr);
4186   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4187   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4188 
4189   PetscFunctionReturn(0);
4190 }
4191 
4192 #undef __FUNCT__
4193 #define __FUNCT__ "MatFileSplit"
4194 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4195 {
4196   PetscErrorCode    ierr;
4197   PetscMPIInt       rank;
4198   PetscInt          m,N,i,rstart,nnz;
4199   size_t            len;
4200   const PetscInt    *indx;
4201   PetscViewer       out;
4202   char              *name;
4203   Mat               B;
4204   const PetscScalar *values;
4205 
4206   PetscFunctionBegin;
4207   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4208   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4209   /* Should this be the type of the diagonal block of A? */
4210   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4211   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4212   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4213   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4214   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4215   for (i=0;i<m;i++) {
4216     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4217     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4218     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4219   }
4220   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4221   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4222 
4223   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4224   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4225   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4226   sprintf(name,"%s.%d",outfile,rank);
4227   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4228   ierr = PetscFree(name);
4229   ierr = MatView(B,out);CHKERRQ(ierr);
4230   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
4231   ierr = MatDestroy(B);CHKERRQ(ierr);
4232   PetscFunctionReturn(0);
4233 }
4234 
4235 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
4236 #undef __FUNCT__
4237 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4238 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4239 {
4240   PetscErrorCode       ierr;
4241   Mat_Merge_SeqsToMPI  *merge;
4242   PetscContainer       container;
4243 
4244   PetscFunctionBegin;
4245   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4246   if (container) {
4247     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4248     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4249     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4250     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4251     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4252     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4253     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4254     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4255     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4256     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4257     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4258     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4259     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4260     ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr);
4261 
4262     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4263     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4264   }
4265   ierr = PetscFree(merge);CHKERRQ(ierr);
4266 
4267   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4268   PetscFunctionReturn(0);
4269 }
4270 
4271 #include "../src/mat/utils/freespace.h"
4272 #include "petscbt.h"
4273 
4274 #undef __FUNCT__
4275 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4276 /*@C
4277       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4278                  matrices from each processor
4279 
4280     Collective on MPI_Comm
4281 
4282    Input Parameters:
4283 +    comm - the communicators the parallel matrix will live on
4284 .    seqmat - the input sequential matrices
4285 .    m - number of local rows (or PETSC_DECIDE)
4286 .    n - number of local columns (or PETSC_DECIDE)
4287 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4288 
4289    Output Parameter:
4290 .    mpimat - the parallel matrix generated
4291 
4292     Level: advanced
4293 
4294    Notes:
4295      The dimensions of the sequential matrix in each processor MUST be the same.
4296      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4297      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4298 @*/
4299 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4300 {
4301   PetscErrorCode       ierr;
4302   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4303   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4304   PetscMPIInt          size,rank,taga,*len_s;
4305   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4306   PetscInt             proc,m;
4307   PetscInt             **buf_ri,**buf_rj;
4308   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4309   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4310   MPI_Request          *s_waits,*r_waits;
4311   MPI_Status           *status;
4312   MatScalar            *aa=a->a;
4313   MatScalar            **abuf_r,*ba_i;
4314   Mat_Merge_SeqsToMPI  *merge;
4315   PetscContainer       container;
4316 
4317   PetscFunctionBegin;
4318   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4319 
4320   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4321   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4322 
4323   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4324   if (container) {
4325     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4326   }
4327   bi     = merge->bi;
4328   bj     = merge->bj;
4329   buf_ri = merge->buf_ri;
4330   buf_rj = merge->buf_rj;
4331 
4332   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4333   owners = merge->rowmap->range;
4334   len_s  = merge->len_s;
4335 
4336   /* send and recv matrix values */
4337   /*-----------------------------*/
4338   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4339   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4340 
4341   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4342   for (proc=0,k=0; proc<size; proc++){
4343     if (!len_s[proc]) continue;
4344     i = owners[proc];
4345     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4346     k++;
4347   }
4348 
4349   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4350   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4351   ierr = PetscFree(status);CHKERRQ(ierr);
4352 
4353   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4354   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4355 
4356   /* insert mat values of mpimat */
4357   /*----------------------------*/
4358   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4359   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4360 
4361   for (k=0; k<merge->nrecv; k++){
4362     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4363     nrows = *(buf_ri_k[k]);
4364     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4365     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4366   }
4367 
4368   /* set values of ba */
4369   m = merge->rowmap->n;
4370   for (i=0; i<m; i++) {
4371     arow = owners[rank] + i;
4372     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4373     bnzi = bi[i+1] - bi[i];
4374     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4375 
4376     /* add local non-zero vals of this proc's seqmat into ba */
4377     anzi = ai[arow+1] - ai[arow];
4378     aj   = a->j + ai[arow];
4379     aa   = a->a + ai[arow];
4380     nextaj = 0;
4381     for (j=0; nextaj<anzi; j++){
4382       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4383         ba_i[j] += aa[nextaj++];
4384       }
4385     }
4386 
4387     /* add received vals into ba */
4388     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4389       /* i-th row */
4390       if (i == *nextrow[k]) {
4391         anzi = *(nextai[k]+1) - *nextai[k];
4392         aj   = buf_rj[k] + *(nextai[k]);
4393         aa   = abuf_r[k] + *(nextai[k]);
4394         nextaj = 0;
4395         for (j=0; nextaj<anzi; j++){
4396           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4397             ba_i[j] += aa[nextaj++];
4398           }
4399         }
4400         nextrow[k]++; nextai[k]++;
4401       }
4402     }
4403     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4404   }
4405   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4406   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4407 
4408   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4409   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4410   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4411   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4412   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4413   PetscFunctionReturn(0);
4414 }
4415 
4416 #undef __FUNCT__
4417 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4418 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4419 {
4420   PetscErrorCode       ierr;
4421   Mat                  B_mpi;
4422   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4423   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4424   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4425   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4426   PetscInt             len,proc,*dnz,*onz;
4427   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4428   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4429   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4430   MPI_Status           *status;
4431   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4432   PetscBT              lnkbt;
4433   Mat_Merge_SeqsToMPI  *merge;
4434   PetscContainer       container;
4435 
4436   PetscFunctionBegin;
4437   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4438 
4439   /* make sure it is a PETSc comm */
4440   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4441   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4442   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4443 
4444   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4445   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4446 
4447   /* determine row ownership */
4448   /*---------------------------------------------------------*/
4449   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4450   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4451   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4452   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4453   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4454   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4455   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4456 
4457   m      = merge->rowmap->n;
4458   M      = merge->rowmap->N;
4459   owners = merge->rowmap->range;
4460 
4461   /* determine the number of messages to send, their lengths */
4462   /*---------------------------------------------------------*/
4463   len_s  = merge->len_s;
4464 
4465   len = 0;  /* length of buf_si[] */
4466   merge->nsend = 0;
4467   for (proc=0; proc<size; proc++){
4468     len_si[proc] = 0;
4469     if (proc == rank){
4470       len_s[proc] = 0;
4471     } else {
4472       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4473       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4474     }
4475     if (len_s[proc]) {
4476       merge->nsend++;
4477       nrows = 0;
4478       for (i=owners[proc]; i<owners[proc+1]; i++){
4479         if (ai[i+1] > ai[i]) nrows++;
4480       }
4481       len_si[proc] = 2*(nrows+1);
4482       len += len_si[proc];
4483     }
4484   }
4485 
4486   /* determine the number and length of messages to receive for ij-structure */
4487   /*-------------------------------------------------------------------------*/
4488   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4489   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4490 
4491   /* post the Irecv of j-structure */
4492   /*-------------------------------*/
4493   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4494   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4495 
4496   /* post the Isend of j-structure */
4497   /*--------------------------------*/
4498   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4499 
4500   for (proc=0, k=0; proc<size; proc++){
4501     if (!len_s[proc]) continue;
4502     i = owners[proc];
4503     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4504     k++;
4505   }
4506 
4507   /* receives and sends of j-structure are complete */
4508   /*------------------------------------------------*/
4509   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4510   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4511 
4512   /* send and recv i-structure */
4513   /*---------------------------*/
4514   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4515   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4516 
4517   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4518   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4519   for (proc=0,k=0; proc<size; proc++){
4520     if (!len_s[proc]) continue;
4521     /* form outgoing message for i-structure:
4522          buf_si[0]:                 nrows to be sent
4523                [1:nrows]:           row index (global)
4524                [nrows+1:2*nrows+1]: i-structure index
4525     */
4526     /*-------------------------------------------*/
4527     nrows = len_si[proc]/2 - 1;
4528     buf_si_i    = buf_si + nrows+1;
4529     buf_si[0]   = nrows;
4530     buf_si_i[0] = 0;
4531     nrows = 0;
4532     for (i=owners[proc]; i<owners[proc+1]; i++){
4533       anzi = ai[i+1] - ai[i];
4534       if (anzi) {
4535         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4536         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4537         nrows++;
4538       }
4539     }
4540     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4541     k++;
4542     buf_si += len_si[proc];
4543   }
4544 
4545   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4546   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4547 
4548   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4549   for (i=0; i<merge->nrecv; i++){
4550     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4551   }
4552 
4553   ierr = PetscFree(len_si);CHKERRQ(ierr);
4554   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4555   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4556   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4557   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4558   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4559   ierr = PetscFree(status);CHKERRQ(ierr);
4560 
4561   /* compute a local seq matrix in each processor */
4562   /*----------------------------------------------*/
4563   /* allocate bi array and free space for accumulating nonzero column info */
4564   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4565   bi[0] = 0;
4566 
4567   /* create and initialize a linked list */
4568   nlnk = N+1;
4569   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4570 
4571   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4572   len = 0;
4573   len  = ai[owners[rank+1]] - ai[owners[rank]];
4574   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4575   current_space = free_space;
4576 
4577   /* determine symbolic info for each local row */
4578   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4579 
4580   for (k=0; k<merge->nrecv; k++){
4581     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4582     nrows = *buf_ri_k[k];
4583     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4584     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4585   }
4586 
4587   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4588   len = 0;
4589   for (i=0;i<m;i++) {
4590     bnzi   = 0;
4591     /* add local non-zero cols of this proc's seqmat into lnk */
4592     arow   = owners[rank] + i;
4593     anzi   = ai[arow+1] - ai[arow];
4594     aj     = a->j + ai[arow];
4595     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4596     bnzi += nlnk;
4597     /* add received col data into lnk */
4598     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4599       if (i == *nextrow[k]) { /* i-th row */
4600         anzi = *(nextai[k]+1) - *nextai[k];
4601         aj   = buf_rj[k] + *nextai[k];
4602         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4603         bnzi += nlnk;
4604         nextrow[k]++; nextai[k]++;
4605       }
4606     }
4607     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4608 
4609     /* if free space is not available, make more free space */
4610     if (current_space->local_remaining<bnzi) {
4611       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4612       nspacedouble++;
4613     }
4614     /* copy data into free space, then initialize lnk */
4615     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4616     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4617 
4618     current_space->array           += bnzi;
4619     current_space->local_used      += bnzi;
4620     current_space->local_remaining -= bnzi;
4621 
4622     bi[i+1] = bi[i] + bnzi;
4623   }
4624 
4625   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4626 
4627   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4628   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4629   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4630 
4631   /* create symbolic parallel matrix B_mpi */
4632   /*---------------------------------------*/
4633   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4634   if (n==PETSC_DECIDE) {
4635     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4636   } else {
4637     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4638   }
4639   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4640   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4641   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4642 
4643   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4644   B_mpi->assembled     = PETSC_FALSE;
4645   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4646   merge->bi            = bi;
4647   merge->bj            = bj;
4648   merge->buf_ri        = buf_ri;
4649   merge->buf_rj        = buf_rj;
4650   merge->coi           = PETSC_NULL;
4651   merge->coj           = PETSC_NULL;
4652   merge->owners_co     = PETSC_NULL;
4653 
4654   /* attach the supporting struct to B_mpi for reuse */
4655   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4656   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4657   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4658   *mpimat = B_mpi;
4659 
4660   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4661   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4662   PetscFunctionReturn(0);
4663 }
4664 
4665 #undef __FUNCT__
4666 #define __FUNCT__ "MatMerge_SeqsToMPI"
4667 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4668 {
4669   PetscErrorCode   ierr;
4670 
4671   PetscFunctionBegin;
4672   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4673   if (scall == MAT_INITIAL_MATRIX){
4674     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4675   }
4676   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4677   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4678   PetscFunctionReturn(0);
4679 }
4680 
4681 #undef __FUNCT__
4682 #define __FUNCT__ "MatGetLocalMat"
4683 /*@
4684      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4685 
4686     Not Collective
4687 
4688    Input Parameters:
4689 +    A - the matrix
4690 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4691 
4692    Output Parameter:
4693 .    A_loc - the local sequential matrix generated
4694 
4695     Level: developer
4696 
4697 @*/
4698 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4699 {
4700   PetscErrorCode  ierr;
4701   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4702   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4703   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4704   MatScalar       *aa=a->a,*ba=b->a,*cam;
4705   PetscScalar     *ca;
4706   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4707   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4708 
4709   PetscFunctionBegin;
4710   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4711   if (scall == MAT_INITIAL_MATRIX){
4712     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4713     ci[0] = 0;
4714     for (i=0; i<am; i++){
4715       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4716     }
4717     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4718     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4719     k = 0;
4720     for (i=0; i<am; i++) {
4721       ncols_o = bi[i+1] - bi[i];
4722       ncols_d = ai[i+1] - ai[i];
4723       /* off-diagonal portion of A */
4724       for (jo=0; jo<ncols_o; jo++) {
4725         col = cmap[*bj];
4726         if (col >= cstart) break;
4727         cj[k]   = col; bj++;
4728         ca[k++] = *ba++;
4729       }
4730       /* diagonal portion of A */
4731       for (j=0; j<ncols_d; j++) {
4732         cj[k]   = cstart + *aj++;
4733         ca[k++] = *aa++;
4734       }
4735       /* off-diagonal portion of A */
4736       for (j=jo; j<ncols_o; j++) {
4737         cj[k]   = cmap[*bj++];
4738         ca[k++] = *ba++;
4739       }
4740     }
4741     /* put together the new matrix */
4742     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4743     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4744     /* Since these are PETSc arrays, change flags to free them as necessary. */
4745     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4746     mat->free_a  = PETSC_TRUE;
4747     mat->free_ij = PETSC_TRUE;
4748     mat->nonew   = 0;
4749   } else if (scall == MAT_REUSE_MATRIX){
4750     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4751     ci = mat->i; cj = mat->j; cam = mat->a;
4752     for (i=0; i<am; i++) {
4753       /* off-diagonal portion of A */
4754       ncols_o = bi[i+1] - bi[i];
4755       for (jo=0; jo<ncols_o; jo++) {
4756         col = cmap[*bj];
4757         if (col >= cstart) break;
4758         *cam++ = *ba++; bj++;
4759       }
4760       /* diagonal portion of A */
4761       ncols_d = ai[i+1] - ai[i];
4762       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4763       /* off-diagonal portion of A */
4764       for (j=jo; j<ncols_o; j++) {
4765         *cam++ = *ba++; bj++;
4766       }
4767     }
4768   } else {
4769     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4770   }
4771 
4772   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4773   PetscFunctionReturn(0);
4774 }
4775 
4776 #undef __FUNCT__
4777 #define __FUNCT__ "MatGetLocalMatCondensed"
4778 /*@C
4779      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4780 
4781     Not Collective
4782 
4783    Input Parameters:
4784 +    A - the matrix
4785 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4786 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4787 
4788    Output Parameter:
4789 .    A_loc - the local sequential matrix generated
4790 
4791     Level: developer
4792 
4793 @*/
4794 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4795 {
4796   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4797   PetscErrorCode    ierr;
4798   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4799   IS                isrowa,iscola;
4800   Mat               *aloc;
4801 
4802   PetscFunctionBegin;
4803   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4804   if (!row){
4805     start = A->rmap->rstart; end = A->rmap->rend;
4806     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4807   } else {
4808     isrowa = *row;
4809   }
4810   if (!col){
4811     start = A->cmap->rstart;
4812     cmap  = a->garray;
4813     nzA   = a->A->cmap->n;
4814     nzB   = a->B->cmap->n;
4815     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4816     ncols = 0;
4817     for (i=0; i<nzB; i++) {
4818       if (cmap[i] < start) idx[ncols++] = cmap[i];
4819       else break;
4820     }
4821     imark = i;
4822     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4823     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4824     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4825   } else {
4826     iscola = *col;
4827   }
4828   if (scall != MAT_INITIAL_MATRIX){
4829     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4830     aloc[0] = *A_loc;
4831   }
4832   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4833   *A_loc = aloc[0];
4834   ierr = PetscFree(aloc);CHKERRQ(ierr);
4835   if (!row){
4836     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4837   }
4838   if (!col){
4839     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4840   }
4841   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4842   PetscFunctionReturn(0);
4843 }
4844 
4845 #undef __FUNCT__
4846 #define __FUNCT__ "MatGetBrowsOfAcols"
4847 /*@C
4848     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4849 
4850     Collective on Mat
4851 
4852    Input Parameters:
4853 +    A,B - the matrices in mpiaij format
4854 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4855 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4856 
4857    Output Parameter:
4858 +    rowb, colb - index sets of rows and columns of B to extract
4859 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4860 -    B_seq - the sequential matrix generated
4861 
4862     Level: developer
4863 
4864 @*/
4865 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4866 {
4867   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4868   PetscErrorCode    ierr;
4869   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4870   IS                isrowb,iscolb;
4871   Mat               *bseq;
4872 
4873   PetscFunctionBegin;
4874   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4875     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4876   }
4877   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4878 
4879   if (scall == MAT_INITIAL_MATRIX){
4880     start = A->cmap->rstart;
4881     cmap  = a->garray;
4882     nzA   = a->A->cmap->n;
4883     nzB   = a->B->cmap->n;
4884     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4885     ncols = 0;
4886     for (i=0; i<nzB; i++) {  /* row < local row index */
4887       if (cmap[i] < start) idx[ncols++] = cmap[i];
4888       else break;
4889     }
4890     imark = i;
4891     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4892     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4893     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4894     *brstart = imark;
4895     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4896   } else {
4897     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4898     isrowb = *rowb; iscolb = *colb;
4899     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4900     bseq[0] = *B_seq;
4901   }
4902   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4903   *B_seq = bseq[0];
4904   ierr = PetscFree(bseq);CHKERRQ(ierr);
4905   if (!rowb){
4906     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4907   } else {
4908     *rowb = isrowb;
4909   }
4910   if (!colb){
4911     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4912   } else {
4913     *colb = iscolb;
4914   }
4915   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4916   PetscFunctionReturn(0);
4917 }
4918 
4919 #undef __FUNCT__
4920 #define __FUNCT__ "MatGetBrowsOfAoCols"
4921 /*@C
4922     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4923     of the OFF-DIAGONAL portion of local A
4924 
4925     Collective on Mat
4926 
4927    Input Parameters:
4928 +    A,B - the matrices in mpiaij format
4929 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4930 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4931 .    startsj_r - similar to startsj for receives
4932 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4933 
4934    Output Parameter:
4935 +    B_oth - the sequential matrix generated
4936 
4937     Level: developer
4938 
4939 @*/
4940 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4941 {
4942   VecScatter_MPI_General *gen_to,*gen_from;
4943   PetscErrorCode         ierr;
4944   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4945   Mat_SeqAIJ             *b_oth;
4946   VecScatter             ctx=a->Mvctx;
4947   MPI_Comm               comm=((PetscObject)ctx)->comm;
4948   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4949   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4950   PetscScalar            *rvalues,*svalues;
4951   MatScalar              *b_otha,*bufa,*bufA;
4952   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4953   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4954   MPI_Status             *sstatus,rstatus;
4955   PetscMPIInt            jj;
4956   PetscInt               *cols,sbs,rbs;
4957   PetscScalar            *vals;
4958 
4959   PetscFunctionBegin;
4960   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4961     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4962   }
4963   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4964   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4965 
4966   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4967   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4968   rvalues  = gen_from->values; /* holds the length of receiving row */
4969   svalues  = gen_to->values;   /* holds the length of sending row */
4970   nrecvs   = gen_from->n;
4971   nsends   = gen_to->n;
4972 
4973   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4974   srow     = gen_to->indices;   /* local row index to be sent */
4975   sstarts  = gen_to->starts;
4976   sprocs   = gen_to->procs;
4977   sstatus  = gen_to->sstatus;
4978   sbs      = gen_to->bs;
4979   rstarts  = gen_from->starts;
4980   rprocs   = gen_from->procs;
4981   rbs      = gen_from->bs;
4982 
4983   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4984   if (scall == MAT_INITIAL_MATRIX){
4985     /* i-array */
4986     /*---------*/
4987     /*  post receives */
4988     for (i=0; i<nrecvs; i++){
4989       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4990       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4991       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4992     }
4993 
4994     /* pack the outgoing message */
4995     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
4996     sstartsj[0] = 0;  rstartsj[0] = 0;
4997     len = 0; /* total length of j or a array to be sent */
4998     k = 0;
4999     for (i=0; i<nsends; i++){
5000       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5001       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5002       for (j=0; j<nrows; j++) {
5003         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5004         for (l=0; l<sbs; l++){
5005           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5006           rowlen[j*sbs+l] = ncols;
5007           len += ncols;
5008           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5009         }
5010         k++;
5011       }
5012       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5013       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5014     }
5015     /* recvs and sends of i-array are completed */
5016     i = nrecvs;
5017     while (i--) {
5018       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5019     }
5020     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5021 
5022     /* allocate buffers for sending j and a arrays */
5023     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5024     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5025 
5026     /* create i-array of B_oth */
5027     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5028     b_othi[0] = 0;
5029     len = 0; /* total length of j or a array to be received */
5030     k = 0;
5031     for (i=0; i<nrecvs; i++){
5032       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5033       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5034       for (j=0; j<nrows; j++) {
5035         b_othi[k+1] = b_othi[k] + rowlen[j];
5036         len += rowlen[j]; k++;
5037       }
5038       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5039     }
5040 
5041     /* allocate space for j and a arrrays of B_oth */
5042     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5043     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5044 
5045     /* j-array */
5046     /*---------*/
5047     /*  post receives of j-array */
5048     for (i=0; i<nrecvs; i++){
5049       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5050       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5051     }
5052 
5053     /* pack the outgoing message j-array */
5054     k = 0;
5055     for (i=0; i<nsends; i++){
5056       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5057       bufJ = bufj+sstartsj[i];
5058       for (j=0; j<nrows; j++) {
5059         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5060         for (ll=0; ll<sbs; ll++){
5061           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5062           for (l=0; l<ncols; l++){
5063             *bufJ++ = cols[l];
5064           }
5065           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5066         }
5067       }
5068       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5069     }
5070 
5071     /* recvs and sends of j-array are completed */
5072     i = nrecvs;
5073     while (i--) {
5074       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5075     }
5076     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5077   } else if (scall == MAT_REUSE_MATRIX){
5078     sstartsj = *startsj;
5079     rstartsj = *startsj_r;
5080     bufa     = *bufa_ptr;
5081     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5082     b_otha   = b_oth->a;
5083   } else {
5084     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5085   }
5086 
5087   /* a-array */
5088   /*---------*/
5089   /*  post receives of a-array */
5090   for (i=0; i<nrecvs; i++){
5091     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5092     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5093   }
5094 
5095   /* pack the outgoing message a-array */
5096   k = 0;
5097   for (i=0; i<nsends; i++){
5098     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5099     bufA = bufa+sstartsj[i];
5100     for (j=0; j<nrows; j++) {
5101       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5102       for (ll=0; ll<sbs; ll++){
5103         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5104         for (l=0; l<ncols; l++){
5105           *bufA++ = vals[l];
5106         }
5107         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5108       }
5109     }
5110     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5111   }
5112   /* recvs and sends of a-array are completed */
5113   i = nrecvs;
5114   while (i--) {
5115     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5116   }
5117   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5118   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5119 
5120   if (scall == MAT_INITIAL_MATRIX){
5121     /* put together the new matrix */
5122     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5123 
5124     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5125     /* Since these are PETSc arrays, change flags to free them as necessary. */
5126     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5127     b_oth->free_a  = PETSC_TRUE;
5128     b_oth->free_ij = PETSC_TRUE;
5129     b_oth->nonew   = 0;
5130 
5131     ierr = PetscFree(bufj);CHKERRQ(ierr);
5132     if (!startsj || !bufa_ptr){
5133       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5134       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5135     } else {
5136       *startsj   = sstartsj;
5137       *startsj_r = rstartsj;
5138       *bufa_ptr  = bufa;
5139     }
5140   }
5141   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5142   PetscFunctionReturn(0);
5143 }
5144 
5145 #undef __FUNCT__
5146 #define __FUNCT__ "MatGetCommunicationStructs"
5147 /*@C
5148   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5149 
5150   Not Collective
5151 
5152   Input Parameters:
5153 . A - The matrix in mpiaij format
5154 
5155   Output Parameter:
5156 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5157 . colmap - A map from global column index to local index into lvec
5158 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5159 
5160   Level: developer
5161 
5162 @*/
5163 #if defined (PETSC_USE_CTABLE)
5164 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5165 #else
5166 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5167 #endif
5168 {
5169   Mat_MPIAIJ *a;
5170 
5171   PetscFunctionBegin;
5172   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5173   PetscValidPointer(lvec, 2);
5174   PetscValidPointer(colmap, 3);
5175   PetscValidPointer(multScatter, 4);
5176   a = (Mat_MPIAIJ *) A->data;
5177   if (lvec) *lvec = a->lvec;
5178   if (colmap) *colmap = a->colmap;
5179   if (multScatter) *multScatter = a->Mvctx;
5180   PetscFunctionReturn(0);
5181 }
5182 
5183 EXTERN_C_BEGIN
5184 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5185 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5186 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5187 EXTERN_C_END
5188 
5189 #undef __FUNCT__
5190 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5191 /*
5192     Computes (B'*A')' since computing B*A directly is untenable
5193 
5194                n                       p                          p
5195         (              )       (              )         (                  )
5196       m (      A       )  *  n (       B      )   =   m (         C        )
5197         (              )       (              )         (                  )
5198 
5199 */
5200 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5201 {
5202   PetscErrorCode     ierr;
5203   Mat                At,Bt,Ct;
5204 
5205   PetscFunctionBegin;
5206   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5207   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5208   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5209   ierr = MatDestroy(At);CHKERRQ(ierr);
5210   ierr = MatDestroy(Bt);CHKERRQ(ierr);
5211   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5212   ierr = MatDestroy(Ct);CHKERRQ(ierr);
5213   PetscFunctionReturn(0);
5214 }
5215 
5216 #undef __FUNCT__
5217 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5218 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5219 {
5220   PetscErrorCode ierr;
5221   PetscInt       m=A->rmap->n,n=B->cmap->n;
5222   Mat            Cmat;
5223 
5224   PetscFunctionBegin;
5225   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5226   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5227   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5228   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5229   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5230   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5231   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5232   *C   = Cmat;
5233   PetscFunctionReturn(0);
5234 }
5235 
5236 /* ----------------------------------------------------------------*/
5237 #undef __FUNCT__
5238 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5239 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5240 {
5241   PetscErrorCode ierr;
5242 
5243   PetscFunctionBegin;
5244   if (scall == MAT_INITIAL_MATRIX){
5245     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5246   }
5247   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5248   PetscFunctionReturn(0);
5249 }
5250 
5251 EXTERN_C_BEGIN
5252 #if defined(PETSC_HAVE_MUMPS)
5253 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5254 #endif
5255 #if defined(PETSC_HAVE_PASTIX)
5256 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5257 #endif
5258 #if defined(PETSC_HAVE_SUPERLU_DIST)
5259 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5260 #endif
5261 #if defined(PETSC_HAVE_SPOOLES)
5262 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5263 #endif
5264 EXTERN_C_END
5265 
5266 /*MC
5267    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5268 
5269    Options Database Keys:
5270 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5271 
5272   Level: beginner
5273 
5274 .seealso: MatCreateMPIAIJ()
5275 M*/
5276 
5277 EXTERN_C_BEGIN
5278 #undef __FUNCT__
5279 #define __FUNCT__ "MatCreate_MPIAIJ"
5280 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
5281 {
5282   Mat_MPIAIJ     *b;
5283   PetscErrorCode ierr;
5284   PetscMPIInt    size;
5285 
5286   PetscFunctionBegin;
5287   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5288 
5289   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5290   B->data         = (void*)b;
5291   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5292   B->rmap->bs     = 1;
5293   B->assembled    = PETSC_FALSE;
5294   B->mapping      = 0;
5295 
5296   B->insertmode   = NOT_SET_VALUES;
5297   b->size         = size;
5298   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5299 
5300   /* build cache for off array entries formed */
5301   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5302   b->donotstash  = PETSC_FALSE;
5303   b->colmap      = 0;
5304   b->garray      = 0;
5305   b->roworiented = PETSC_TRUE;
5306 
5307   /* stuff used for matrix vector multiply */
5308   b->lvec      = PETSC_NULL;
5309   b->Mvctx     = PETSC_NULL;
5310 
5311   /* stuff for MatGetRow() */
5312   b->rowindices   = 0;
5313   b->rowvalues    = 0;
5314   b->getrowactive = PETSC_FALSE;
5315 
5316 #if defined(PETSC_HAVE_SPOOLES)
5317   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5318                                      "MatGetFactor_mpiaij_spooles",
5319                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5320 #endif
5321 #if defined(PETSC_HAVE_MUMPS)
5322   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5323                                      "MatGetFactor_aij_mumps",
5324                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5325 #endif
5326 #if defined(PETSC_HAVE_PASTIX)
5327   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5328 					   "MatGetFactor_mpiaij_pastix",
5329 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5330 #endif
5331 #if defined(PETSC_HAVE_SUPERLU_DIST)
5332   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5333                                      "MatGetFactor_mpiaij_superlu_dist",
5334                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5335 #endif
5336   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5337                                      "MatStoreValues_MPIAIJ",
5338                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5339   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5340                                      "MatRetrieveValues_MPIAIJ",
5341                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5342   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5343 				     "MatGetDiagonalBlock_MPIAIJ",
5344                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5345   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5346 				     "MatIsTranspose_MPIAIJ",
5347 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5348   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5349 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5350 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5351   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5352 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5353 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5354   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5355 				     "MatDiagonalScaleLocal_MPIAIJ",
5356 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5357   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5358                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5359                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5360   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5361                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5362                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5363   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5364                                      "MatConvert_MPIAIJ_MPISBAIJ",
5365                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5366   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5367                                      "MatMatMult_MPIDense_MPIAIJ",
5368                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5369   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5370                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5371                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5372   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5373                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5374                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5375   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5376   PetscFunctionReturn(0);
5377 }
5378 EXTERN_C_END
5379 
5380 #undef __FUNCT__
5381 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5382 /*@
5383      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5384          and "off-diagonal" part of the matrix in CSR format.
5385 
5386    Collective on MPI_Comm
5387 
5388    Input Parameters:
5389 +  comm - MPI communicator
5390 .  m - number of local rows (Cannot be PETSC_DECIDE)
5391 .  n - This value should be the same as the local size used in creating the
5392        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5393        calculated if N is given) For square matrices n is almost always m.
5394 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5395 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5396 .   i - row indices for "diagonal" portion of matrix
5397 .   j - column indices
5398 .   a - matrix values
5399 .   oi - row indices for "off-diagonal" portion of matrix
5400 .   oj - column indices
5401 -   oa - matrix values
5402 
5403    Output Parameter:
5404 .   mat - the matrix
5405 
5406    Level: advanced
5407 
5408    Notes:
5409        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5410 
5411        The i and j indices are 0 based
5412 
5413        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5414 
5415        This sets local rows and cannot be used to set off-processor values.
5416 
5417        You cannot later use MatSetValues() to change values in this matrix.
5418 
5419 .keywords: matrix, aij, compressed row, sparse, parallel
5420 
5421 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5422           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5423 @*/
5424 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5425 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5426 {
5427   PetscErrorCode ierr;
5428   Mat_MPIAIJ     *maij;
5429 
5430  PetscFunctionBegin;
5431   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5432   if (i[0]) {
5433     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5434   }
5435   if (oi[0]) {
5436     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5437   }
5438   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5439   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5440   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5441   maij = (Mat_MPIAIJ*) (*mat)->data;
5442   maij->donotstash     = PETSC_TRUE;
5443   (*mat)->preallocated = PETSC_TRUE;
5444 
5445   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5446   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5447   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5448   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5449 
5450   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5451   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5452 
5453   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5454   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5455   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5456   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5457 
5458   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5459   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5460   PetscFunctionReturn(0);
5461 }
5462 
5463 /*
5464     Special version for direct calls from Fortran
5465 */
5466 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5467 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5468 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5469 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5470 #endif
5471 
5472 /* Change these macros so can be used in void function */
5473 #undef CHKERRQ
5474 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5475 #undef SETERRQ2
5476 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5477 #undef SETERRQ
5478 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5479 
5480 EXTERN_C_BEGIN
5481 #undef __FUNCT__
5482 #define __FUNCT__ "matsetvaluesmpiaij_"
5483 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5484 {
5485   Mat             mat = *mmat;
5486   PetscInt        m = *mm, n = *mn;
5487   InsertMode      addv = *maddv;
5488   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5489   PetscScalar     value;
5490   PetscErrorCode  ierr;
5491 
5492   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5493   if (mat->insertmode == NOT_SET_VALUES) {
5494     mat->insertmode = addv;
5495   }
5496 #if defined(PETSC_USE_DEBUG)
5497   else if (mat->insertmode != addv) {
5498     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5499   }
5500 #endif
5501   {
5502   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5503   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5504   PetscBool       roworiented = aij->roworiented;
5505 
5506   /* Some Variables required in the macro */
5507   Mat             A = aij->A;
5508   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5509   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5510   MatScalar       *aa = a->a;
5511   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5512   Mat             B = aij->B;
5513   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5514   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5515   MatScalar       *ba = b->a;
5516 
5517   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5518   PetscInt        nonew = a->nonew;
5519   MatScalar       *ap1,*ap2;
5520 
5521   PetscFunctionBegin;
5522   for (i=0; i<m; i++) {
5523     if (im[i] < 0) continue;
5524 #if defined(PETSC_USE_DEBUG)
5525     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5526 #endif
5527     if (im[i] >= rstart && im[i] < rend) {
5528       row      = im[i] - rstart;
5529       lastcol1 = -1;
5530       rp1      = aj + ai[row];
5531       ap1      = aa + ai[row];
5532       rmax1    = aimax[row];
5533       nrow1    = ailen[row];
5534       low1     = 0;
5535       high1    = nrow1;
5536       lastcol2 = -1;
5537       rp2      = bj + bi[row];
5538       ap2      = ba + bi[row];
5539       rmax2    = bimax[row];
5540       nrow2    = bilen[row];
5541       low2     = 0;
5542       high2    = nrow2;
5543 
5544       for (j=0; j<n; j++) {
5545         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5546         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5547         if (in[j] >= cstart && in[j] < cend){
5548           col = in[j] - cstart;
5549           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5550         } else if (in[j] < 0) continue;
5551 #if defined(PETSC_USE_DEBUG)
5552         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5553 #endif
5554         else {
5555           if (mat->was_assembled) {
5556             if (!aij->colmap) {
5557               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5558             }
5559 #if defined (PETSC_USE_CTABLE)
5560             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5561 	    col--;
5562 #else
5563             col = aij->colmap[in[j]] - 1;
5564 #endif
5565             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5566               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5567               col =  in[j];
5568               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5569               B = aij->B;
5570               b = (Mat_SeqAIJ*)B->data;
5571               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5572               rp2      = bj + bi[row];
5573               ap2      = ba + bi[row];
5574               rmax2    = bimax[row];
5575               nrow2    = bilen[row];
5576               low2     = 0;
5577               high2    = nrow2;
5578               bm       = aij->B->rmap->n;
5579               ba = b->a;
5580             }
5581           } else col = in[j];
5582           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5583         }
5584       }
5585     } else {
5586       if (!aij->donotstash) {
5587         if (roworiented) {
5588           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5589         } else {
5590           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5591         }
5592       }
5593     }
5594   }}
5595   PetscFunctionReturnVoid();
5596 }
5597 EXTERN_C_END
5598 
5599