xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 2eff7a5159698db79bc55cdeeefa9ddc181a02b7)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "petscblaslapack.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscBool      aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap->n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap->n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap->n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap->n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
207 { \
208     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
209     lastcol1 = col;\
210     while (high1-low1 > 5) { \
211       t = (low1+high1)/2; \
212       if (rp1[t] > col) high1 = t; \
213       else             low1  = t; \
214     } \
215       for (_i=low1; _i<high1; _i++) { \
216         if (rp1[_i] > col) break; \
217         if (rp1[_i] == col) { \
218           if (addv == ADD_VALUES) ap1[_i] += value;   \
219           else                    ap1[_i] = value; \
220           goto a_noinsert; \
221         } \
222       }  \
223       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
224       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
225       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
226       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
227       N = nrow1++ - 1; a->nz++; high1++; \
228       /* shift up all the later entries in this row */ \
229       for (ii=N; ii>=_i; ii--) { \
230         rp1[ii+1] = rp1[ii]; \
231         ap1[ii+1] = ap1[ii]; \
232       } \
233       rp1[_i] = col;  \
234       ap1[_i] = value;  \
235       a_noinsert: ; \
236       ailen[row] = nrow1; \
237 }
238 
239 
240 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
241 { \
242     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
243     lastcol2 = col;\
244     while (high2-low2 > 5) { \
245       t = (low2+high2)/2; \
246       if (rp2[t] > col) high2 = t; \
247       else             low2  = t; \
248     } \
249     for (_i=low2; _i<high2; _i++) {		\
250       if (rp2[_i] > col) break;			\
251       if (rp2[_i] == col) {			      \
252 	if (addv == ADD_VALUES) ap2[_i] += value;     \
253 	else                    ap2[_i] = value;      \
254 	goto b_noinsert;			      \
255       }						      \
256     }							      \
257     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
258     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
259     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
260     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
261     N = nrow2++ - 1; b->nz++; high2++;					\
262     /* shift up all the later entries in this row */			\
263     for (ii=N; ii>=_i; ii--) {						\
264       rp2[ii+1] = rp2[ii];						\
265       ap2[ii+1] = ap2[ii];						\
266     }									\
267     rp2[_i] = col;							\
268     ap2[_i] = value;							\
269     b_noinsert: ;								\
270     bilen[row] = nrow2;							\
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
275 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
276 {
277   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
278   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
279   PetscErrorCode ierr;
280   PetscInt       l,*garray = mat->garray,diag;
281 
282   PetscFunctionBegin;
283   /* code only works for square matrices A */
284 
285   /* find size of row to the left of the diagonal part */
286   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
287   row  = row - diag;
288   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
289     if (garray[b->j[b->i[row]+l]] > diag) break;
290   }
291   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
292 
293   /* diagonal part */
294   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
295 
296   /* right of diagonal part */
297   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
298   PetscFunctionReturn(0);
299 }
300 
301 #undef __FUNCT__
302 #define __FUNCT__ "MatSetValues_MPIAIJ"
303 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
304 {
305   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
306   PetscScalar    value;
307   PetscErrorCode ierr;
308   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
309   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
310   PetscBool      roworiented = aij->roworiented;
311 
312   /* Some Variables required in the macro */
313   Mat            A = aij->A;
314   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
315   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
316   MatScalar      *aa = a->a;
317   PetscBool      ignorezeroentries = a->ignorezeroentries;
318   Mat            B = aij->B;
319   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
320   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
321   MatScalar      *ba = b->a;
322 
323   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
324   PetscInt       nonew = a->nonew;
325   MatScalar      *ap1,*ap2;
326 
327   PetscFunctionBegin;
328   if (v) PetscValidScalarPointer(v,6);
329   for (i=0; i<m; i++) {
330     if (im[i] < 0) continue;
331 #if defined(PETSC_USE_DEBUG)
332     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
333 #endif
334     if (im[i] >= rstart && im[i] < rend) {
335       row      = im[i] - rstart;
336       lastcol1 = -1;
337       rp1      = aj + ai[row];
338       ap1      = aa + ai[row];
339       rmax1    = aimax[row];
340       nrow1    = ailen[row];
341       low1     = 0;
342       high1    = nrow1;
343       lastcol2 = -1;
344       rp2      = bj + bi[row];
345       ap2      = ba + bi[row];
346       rmax2    = bimax[row];
347       nrow2    = bilen[row];
348       low2     = 0;
349       high2    = nrow2;
350 
351       for (j=0; j<n; j++) {
352         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
353         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
354         if (in[j] >= cstart && in[j] < cend){
355           col = in[j] - cstart;
356           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
357         } else if (in[j] < 0) continue;
358 #if defined(PETSC_USE_DEBUG)
359         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
360 #endif
361         else {
362           if (mat->was_assembled) {
363             if (!aij->colmap) {
364               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
365             }
366 #if defined (PETSC_USE_CTABLE)
367             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
368 	    col--;
369 #else
370             col = aij->colmap[in[j]] - 1;
371 #endif
372             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
373               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
374               col =  in[j];
375               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
376               B = aij->B;
377               b = (Mat_SeqAIJ*)B->data;
378               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
379               rp2      = bj + bi[row];
380               ap2      = ba + bi[row];
381               rmax2    = bimax[row];
382               nrow2    = bilen[row];
383               low2     = 0;
384               high2    = nrow2;
385               bm       = aij->B->rmap->n;
386               ba = b->a;
387             }
388           } else col = in[j];
389           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
390         }
391       }
392     } else {
393       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
394       if (!aij->donotstash) {
395         if (roworiented) {
396           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
397         } else {
398           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
399         }
400       }
401     }
402   }
403   PetscFunctionReturn(0);
404 }
405 
406 #undef __FUNCT__
407 #define __FUNCT__ "MatGetValues_MPIAIJ"
408 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
409 {
410   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
413   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
414 
415   PetscFunctionBegin;
416   for (i=0; i<m; i++) {
417     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
418     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
419     if (idxm[i] >= rstart && idxm[i] < rend) {
420       row = idxm[i] - rstart;
421       for (j=0; j<n; j++) {
422         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
423         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
424         if (idxn[j] >= cstart && idxn[j] < cend){
425           col = idxn[j] - cstart;
426           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
427         } else {
428           if (!aij->colmap) {
429             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
430           }
431 #if defined (PETSC_USE_CTABLE)
432           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
433           col --;
434 #else
435           col = aij->colmap[idxn[j]] - 1;
436 #endif
437           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
438           else {
439             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
440           }
441         }
442       }
443     } else {
444       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
445     }
446   }
447   PetscFunctionReturn(0);
448 }
449 
450 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscErrorCode ierr;
458   PetscInt       nstash,reallocs;
459   InsertMode     addv;
460 
461   PetscFunctionBegin;
462   if (aij->donotstash || mat->nooffprocentries) {
463     PetscFunctionReturn(0);
464   }
465 
466   /* make sure all processors are either in INSERTMODE or ADDMODE */
467   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
468   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
469   mat->insertmode = addv; /* in case this processor had no cache */
470 
471   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
472   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
473   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
474   PetscFunctionReturn(0);
475 }
476 
477 #undef __FUNCT__
478 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
479 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
480 {
481   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
482   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
483   PetscErrorCode ierr;
484   PetscMPIInt    n;
485   PetscInt       i,j,rstart,ncols,flg;
486   PetscInt       *row,*col;
487   PetscBool      other_disassembled;
488   PetscScalar    *val;
489   InsertMode     addv = mat->insertmode;
490 
491   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
492   PetscFunctionBegin;
493   if (!aij->donotstash && !mat->nooffprocentries) {
494     while (1) {
495       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
496       if (!flg) break;
497 
498       for (i=0; i<n;) {
499         /* Now identify the consecutive vals belonging to the same row */
500         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
501         if (j < n) ncols = j-i;
502         else       ncols = n-i;
503         /* Now assemble all these values with a single function call */
504         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
505         i = j;
506       }
507     }
508     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
509   }
510   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
511   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
512 
513   /* determine if any processor has disassembled, if so we must
514      also disassemble ourselfs, in order that we may reassemble. */
515   /*
516      if nonzero structure of submatrix B cannot change then we know that
517      no processor disassembled thus we can skip this stuff
518   */
519   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
520     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
521     if (mat->was_assembled && !other_disassembled) {
522       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
523     }
524   }
525   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
526     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
527   }
528   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
529   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
530   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
531   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
532 
533   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
534   aij->rowvalues = 0;
535 
536   /* used by MatAXPY() */
537   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
538   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
539 
540   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
541   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
542   PetscFunctionReturn(0);
543 }
544 
545 #undef __FUNCT__
546 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
547 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
548 {
549   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
550   PetscErrorCode ierr;
551 
552   PetscFunctionBegin;
553   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
554   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
555   PetscFunctionReturn(0);
556 }
557 
558 #undef __FUNCT__
559 #define __FUNCT__ "MatZeroRows_MPIAIJ"
560 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
561 {
562   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
563   PetscErrorCode    ierr;
564   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
565   PetscInt          i,*owners = A->rmap->range;
566   PetscInt          *nprocs,j,idx,nsends,row;
567   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
568   PetscInt          *rvalues,count,base,slen,*source;
569   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
570   MPI_Comm          comm = ((PetscObject)A)->comm;
571   MPI_Request       *send_waits,*recv_waits;
572   MPI_Status        recv_status,*send_status;
573   const PetscScalar *xx;
574   PetscScalar       *bb;
575 #if defined(PETSC_DEBUG)
576   PetscBool      found = PETSC_FALSE;
577 #endif
578 
579   PetscFunctionBegin;
580   /*  first count number of contributors to each processor */
581   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
582   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
583   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
584   j = 0;
585   for (i=0; i<N; i++) {
586     if (lastidx > (idx = rows[i])) j = 0;
587     lastidx = idx;
588     for (; j<size; j++) {
589       if (idx >= owners[j] && idx < owners[j+1]) {
590         nprocs[2*j]++;
591         nprocs[2*j+1] = 1;
592         owner[i] = j;
593 #if defined(PETSC_DEBUG)
594         found = PETSC_TRUE;
595 #endif
596         break;
597       }
598     }
599 #if defined(PETSC_DEBUG)
600     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
601     found = PETSC_FALSE;
602 #endif
603   }
604   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
605 
606   if (A->nooffproczerorows) {
607     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
608     nrecvs = nsends;
609     nmax   = N;
610   } else {
611     /* inform other processors of number of messages and max length*/
612     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
613   }
614 
615   /* post receives:   */
616   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
617   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
618   for (i=0; i<nrecvs; i++) {
619     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
620   }
621 
622   /* do sends:
623       1) starts[i] gives the starting index in svalues for stuff going to
624          the ith processor
625   */
626   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
627   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
628   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
629   starts[0] = 0;
630   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
631   for (i=0; i<N; i++) {
632     svalues[starts[owner[i]]++] = rows[i];
633   }
634 
635   starts[0] = 0;
636   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
637   count = 0;
638   for (i=0; i<size; i++) {
639     if (nprocs[2*i+1]) {
640       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
641     }
642   }
643   ierr = PetscFree(starts);CHKERRQ(ierr);
644 
645   base = owners[rank];
646 
647   /*  wait on receives */
648   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
649   count  = nrecvs; slen = 0;
650   while (count) {
651     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
652     /* unpack receives into our local space */
653     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
654     source[imdex]  = recv_status.MPI_SOURCE;
655     lens[imdex]    = n;
656     slen          += n;
657     count--;
658   }
659   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
660 
661   /* move the data into the send scatter */
662   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
663   count = 0;
664   for (i=0; i<nrecvs; i++) {
665     values = rvalues + i*nmax;
666     for (j=0; j<lens[i]; j++) {
667       lrows[count++] = values[j] - base;
668     }
669   }
670   ierr = PetscFree(rvalues);CHKERRQ(ierr);
671   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
672   ierr = PetscFree(owner);CHKERRQ(ierr);
673   ierr = PetscFree(nprocs);CHKERRQ(ierr);
674 
675   /* fix right hand side if needed */
676   if (x && b) {
677     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
678     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
679     for (i=0; i<slen; i++) {
680       bb[lrows[i]] = diag*xx[lrows[i]];
681     }
682     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
683     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
684   }
685   /*
686         Zero the required rows. If the "diagonal block" of the matrix
687      is square and the user wishes to set the diagonal we use separate
688      code so that MatSetValues() is not called for each diagonal allocating
689      new memory, thus calling lots of mallocs and slowing things down.
690 
691   */
692   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
693   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
694   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
695     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
696   } else if (diag != 0.0) {
697     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
698     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
699       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
700 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
701     }
702     for (i = 0; i < slen; i++) {
703       row  = lrows[i] + rstart;
704       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
705     }
706     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
707     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
708   } else {
709     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
710   }
711   ierr = PetscFree(lrows);CHKERRQ(ierr);
712 
713   /* wait on sends */
714   if (nsends) {
715     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
716     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
717     ierr = PetscFree(send_status);CHKERRQ(ierr);
718   }
719   ierr = PetscFree(send_waits);CHKERRQ(ierr);
720   ierr = PetscFree(svalues);CHKERRQ(ierr);
721 
722   PetscFunctionReturn(0);
723 }
724 
725 #undef __FUNCT__
726 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
727 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
728 {
729   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
730   PetscErrorCode    ierr;
731   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
732   PetscInt          i,*owners = A->rmap->range;
733   PetscInt          *nprocs,j,idx,nsends;
734   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
735   PetscInt          *rvalues,count,base,slen,*source;
736   PetscInt          *lens,*lrows,*values,m;
737   MPI_Comm          comm = ((PetscObject)A)->comm;
738   MPI_Request       *send_waits,*recv_waits;
739   MPI_Status        recv_status,*send_status;
740   const PetscScalar *xx;
741   PetscScalar       *bb,*mask;
742   Vec               xmask,lmask;
743   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
744   const PetscInt    *aj, *ii,*ridx;
745   PetscScalar       *aa;
746 #if defined(PETSC_DEBUG)
747   PetscBool         found = PETSC_FALSE;
748 #endif
749 
750   PetscFunctionBegin;
751   /*  first count number of contributors to each processor */
752   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
753   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
754   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
755   j = 0;
756   for (i=0; i<N; i++) {
757     if (lastidx > (idx = rows[i])) j = 0;
758     lastidx = idx;
759     for (; j<size; j++) {
760       if (idx >= owners[j] && idx < owners[j+1]) {
761         nprocs[2*j]++;
762         nprocs[2*j+1] = 1;
763         owner[i] = j;
764 #if defined(PETSC_DEBUG)
765         found = PETSC_TRUE;
766 #endif
767         break;
768       }
769     }
770 #if defined(PETSC_DEBUG)
771     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
772     found = PETSC_FALSE;
773 #endif
774   }
775   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
776 
777   /* inform other processors of number of messages and max length*/
778   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
779 
780   /* post receives:   */
781   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
782   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
783   for (i=0; i<nrecvs; i++) {
784     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
785   }
786 
787   /* do sends:
788       1) starts[i] gives the starting index in svalues for stuff going to
789          the ith processor
790   */
791   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
792   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
793   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
794   starts[0] = 0;
795   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
796   for (i=0; i<N; i++) {
797     svalues[starts[owner[i]]++] = rows[i];
798   }
799 
800   starts[0] = 0;
801   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
802   count = 0;
803   for (i=0; i<size; i++) {
804     if (nprocs[2*i+1]) {
805       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
806     }
807   }
808   ierr = PetscFree(starts);CHKERRQ(ierr);
809 
810   base = owners[rank];
811 
812   /*  wait on receives */
813   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
814   count  = nrecvs; slen = 0;
815   while (count) {
816     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
817     /* unpack receives into our local space */
818     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
819     source[imdex]  = recv_status.MPI_SOURCE;
820     lens[imdex]    = n;
821     slen          += n;
822     count--;
823   }
824   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
825 
826   /* move the data into the send scatter */
827   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
828   count = 0;
829   for (i=0; i<nrecvs; i++) {
830     values = rvalues + i*nmax;
831     for (j=0; j<lens[i]; j++) {
832       lrows[count++] = values[j] - base;
833     }
834   }
835   ierr = PetscFree(rvalues);CHKERRQ(ierr);
836   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
837   ierr = PetscFree(owner);CHKERRQ(ierr);
838   ierr = PetscFree(nprocs);CHKERRQ(ierr);
839   /* lrows are the local rows to be zeroed, slen is the number of local rows */
840 
841   /* zero diagonal part of matrix */
842   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
843 
844   /* handle off diagonal part of matrix */
845   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
846   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
847   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
848   for (i=0; i<slen; i++) {
849     bb[lrows[i]] = 1;
850   }
851   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
852   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
853   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
854   ierr = VecDestroy(xmask);CHKERRQ(ierr);
855   ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
856   ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
857   ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
858   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
859   ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
860 
861   /* remove zeroed rows of off diagonal matrix */
862   ii = aij->i;
863   for (i=0; i<slen; i++) {
864     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
865   }
866 
867   /* loop over all elements of off process part of matrix zeroing removed columns*/
868   if (aij->compressedrow.use){
869     m    = aij->compressedrow.nrows;
870     ii   = aij->compressedrow.i;
871     ridx = aij->compressedrow.rindex;
872     for (i=0; i<m; i++){
873       n   = ii[i+1] - ii[i];
874       aj  = aij->j + ii[i];
875       aa  = aij->a + ii[i];
876 
877       for (j=0; j<n; j++) {
878         if (PetscAbsScalar(mask[*aj])) {
879           bb[*ridx] -= *aa*xx[*aj];
880           *aa        = 0.0;
881         }
882         aa++;
883         aj++;
884       }
885       ridx++;
886     }
887   } else { /* do not use compressed row format */
888     m = l->B->rmap->n;
889    for (i=0; i<m; i++) {
890      n   = ii[i+1] - ii[i];
891      aj  = aij->j + ii[i];
892      aa  = aij->a + ii[i];
893      for (j=0; j<n; j++) {
894         if (PetscAbsScalar(mask[*aj])) {
895           bb[i] -= *aa*xx[*aj];
896           *aa    = 0.0;
897         }
898         aa++;
899         aj++;
900       }
901     }
902   }
903 
904   ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
905   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
906   ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
907   ierr = VecDestroy(lmask);CHKERRQ(ierr);
908   ierr = PetscFree(lrows);CHKERRQ(ierr);
909 
910   /* wait on sends */
911   if (nsends) {
912     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
913     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
914     ierr = PetscFree(send_status);CHKERRQ(ierr);
915   }
916   ierr = PetscFree(send_waits);CHKERRQ(ierr);
917   ierr = PetscFree(svalues);CHKERRQ(ierr);
918 
919   PetscFunctionReturn(0);
920 }
921 
922 #undef __FUNCT__
923 #define __FUNCT__ "MatMult_MPIAIJ"
924 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
925 {
926   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
927   PetscErrorCode ierr;
928   PetscInt       nt;
929 
930   PetscFunctionBegin;
931   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
932   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
933   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
934   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
935   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
936   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
937   PetscFunctionReturn(0);
938 }
939 
940 #undef __FUNCT__
941 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
942 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
943 {
944   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
945   PetscErrorCode ierr;
946 
947   PetscFunctionBegin;
948   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
949   PetscFunctionReturn(0);
950 }
951 
952 #undef __FUNCT__
953 #define __FUNCT__ "MatMultAdd_MPIAIJ"
954 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
955 {
956   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
957   PetscErrorCode ierr;
958 
959   PetscFunctionBegin;
960   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
961   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
962   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
963   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
964   PetscFunctionReturn(0);
965 }
966 
967 #undef __FUNCT__
968 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
969 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
970 {
971   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
972   PetscErrorCode ierr;
973   PetscBool      merged;
974 
975   PetscFunctionBegin;
976   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
977   /* do nondiagonal part */
978   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
979   if (!merged) {
980     /* send it on its way */
981     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
982     /* do local part */
983     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
984     /* receive remote parts: note this assumes the values are not actually */
985     /* added in yy until the next line, */
986     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
987   } else {
988     /* do local part */
989     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
990     /* send it on its way */
991     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
992     /* values actually were received in the Begin() but we need to call this nop */
993     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
994   }
995   PetscFunctionReturn(0);
996 }
997 
998 EXTERN_C_BEGIN
999 #undef __FUNCT__
1000 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1001 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1002 {
1003   MPI_Comm       comm;
1004   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1005   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1006   IS             Me,Notme;
1007   PetscErrorCode ierr;
1008   PetscInt       M,N,first,last,*notme,i;
1009   PetscMPIInt    size;
1010 
1011   PetscFunctionBegin;
1012 
1013   /* Easy test: symmetric diagonal block */
1014   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1015   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1016   if (!*f) PetscFunctionReturn(0);
1017   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1018   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1019   if (size == 1) PetscFunctionReturn(0);
1020 
1021   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1022   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1023   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1024   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1025   for (i=0; i<first; i++) notme[i] = i;
1026   for (i=last; i<M; i++) notme[i-last+first] = i;
1027   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1028   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1029   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1030   Aoff = Aoffs[0];
1031   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1032   Boff = Boffs[0];
1033   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1034   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1035   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1036   ierr = ISDestroy(Me);CHKERRQ(ierr);
1037   ierr = ISDestroy(Notme);CHKERRQ(ierr);
1038 
1039   PetscFunctionReturn(0);
1040 }
1041 EXTERN_C_END
1042 
1043 #undef __FUNCT__
1044 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1045 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1046 {
1047   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1048   PetscErrorCode ierr;
1049 
1050   PetscFunctionBegin;
1051   /* do nondiagonal part */
1052   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1053   /* send it on its way */
1054   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1055   /* do local part */
1056   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1057   /* receive remote parts */
1058   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1059   PetscFunctionReturn(0);
1060 }
1061 
1062 /*
1063   This only works correctly for square matrices where the subblock A->A is the
1064    diagonal block
1065 */
1066 #undef __FUNCT__
1067 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1068 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1069 {
1070   PetscErrorCode ierr;
1071   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1072 
1073   PetscFunctionBegin;
1074   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1075   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1076   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1077   PetscFunctionReturn(0);
1078 }
1079 
1080 #undef __FUNCT__
1081 #define __FUNCT__ "MatScale_MPIAIJ"
1082 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1083 {
1084   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1085   PetscErrorCode ierr;
1086 
1087   PetscFunctionBegin;
1088   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1089   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1090   PetscFunctionReturn(0);
1091 }
1092 
1093 #undef __FUNCT__
1094 #define __FUNCT__ "MatDestroy_MPIAIJ"
1095 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1096 {
1097   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1098   PetscErrorCode ierr;
1099 
1100   PetscFunctionBegin;
1101 #if defined(PETSC_USE_LOG)
1102   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1103 #endif
1104   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1105   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
1106   if (aij->A){ierr = MatDestroy(aij->A);CHKERRQ(ierr);}
1107   if (aij->B){ierr = MatDestroy(aij->B);CHKERRQ(ierr);}
1108 #if defined (PETSC_USE_CTABLE)
1109   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
1110 #else
1111   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1112 #endif
1113   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1114   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
1115   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
1116   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1117   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1118   ierr = PetscFree(aij);CHKERRQ(ierr);
1119 
1120   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1121   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1122   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1123   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1124   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1125   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1126   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1127   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1128   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1129   PetscFunctionReturn(0);
1130 }
1131 
1132 #undef __FUNCT__
1133 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1134 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1135 {
1136   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1137   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1138   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1139   PetscErrorCode    ierr;
1140   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1141   int               fd;
1142   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1143   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1144   PetscScalar       *column_values;
1145   PetscInt          message_count,flowcontrolcount;
1146 
1147   PetscFunctionBegin;
1148   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1149   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1150   nz   = A->nz + B->nz;
1151   if (!rank) {
1152     header[0] = MAT_FILE_CLASSID;
1153     header[1] = mat->rmap->N;
1154     header[2] = mat->cmap->N;
1155     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1156     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1157     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1158     /* get largest number of rows any processor has */
1159     rlen = mat->rmap->n;
1160     range = mat->rmap->range;
1161     for (i=1; i<size; i++) {
1162       rlen = PetscMax(rlen,range[i+1] - range[i]);
1163     }
1164   } else {
1165     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1166     rlen = mat->rmap->n;
1167   }
1168 
1169   /* load up the local row counts */
1170   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1171   for (i=0; i<mat->rmap->n; i++) {
1172     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1173   }
1174 
1175   /* store the row lengths to the file */
1176   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1177   if (!rank) {
1178     MPI_Status status;
1179     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1180     for (i=1; i<size; i++) {
1181       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1182       rlen = range[i+1] - range[i];
1183       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1184       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1185     }
1186     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1187   } else {
1188     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1189     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1190     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1191   }
1192   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1193 
1194   /* load up the local column indices */
1195   nzmax = nz; /* )th processor needs space a largest processor needs */
1196   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1197   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1198   cnt  = 0;
1199   for (i=0; i<mat->rmap->n; i++) {
1200     for (j=B->i[i]; j<B->i[i+1]; j++) {
1201       if ( (col = garray[B->j[j]]) > cstart) break;
1202       column_indices[cnt++] = col;
1203     }
1204     for (k=A->i[i]; k<A->i[i+1]; k++) {
1205       column_indices[cnt++] = A->j[k] + cstart;
1206     }
1207     for (; j<B->i[i+1]; j++) {
1208       column_indices[cnt++] = garray[B->j[j]];
1209     }
1210   }
1211   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1212 
1213   /* store the column indices to the file */
1214    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1215   if (!rank) {
1216     MPI_Status status;
1217     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1218     for (i=1; i<size; i++) {
1219       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1220       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1221       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1222       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1223       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1224     }
1225      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1226   } else {
1227     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1228     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1229     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1230     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1231   }
1232   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1233 
1234   /* load up the local column values */
1235   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1236   cnt  = 0;
1237   for (i=0; i<mat->rmap->n; i++) {
1238     for (j=B->i[i]; j<B->i[i+1]; j++) {
1239       if ( garray[B->j[j]] > cstart) break;
1240       column_values[cnt++] = B->a[j];
1241     }
1242     for (k=A->i[i]; k<A->i[i+1]; k++) {
1243       column_values[cnt++] = A->a[k];
1244     }
1245     for (; j<B->i[i+1]; j++) {
1246       column_values[cnt++] = B->a[j];
1247     }
1248   }
1249   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1250 
1251   /* store the column values to the file */
1252    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1253   if (!rank) {
1254     MPI_Status status;
1255     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1256     for (i=1; i<size; i++) {
1257        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1258       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1259       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1260       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1261       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1262     }
1263     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1264   } else {
1265     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1266     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1267     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1268     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1269   }
1270   ierr = PetscFree(column_values);CHKERRQ(ierr);
1271   PetscFunctionReturn(0);
1272 }
1273 
1274 #undef __FUNCT__
1275 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1276 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1277 {
1278   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1279   PetscErrorCode    ierr;
1280   PetscMPIInt       rank = aij->rank,size = aij->size;
1281   PetscBool         isdraw,iascii,isbinary;
1282   PetscViewer       sviewer;
1283   PetscViewerFormat format;
1284 
1285   PetscFunctionBegin;
1286   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1287   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1288   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1289   if (iascii) {
1290     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1291     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1292       MatInfo    info;
1293       PetscBool  inodes;
1294 
1295       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1296       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1297       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1298       if (!inodes) {
1299         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1300 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1301       } else {
1302         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1303 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1304       }
1305       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1306       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1307       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1308       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1309       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1310       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1311       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1312       PetscFunctionReturn(0);
1313     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1314       PetscInt   inodecount,inodelimit,*inodes;
1315       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1316       if (inodes) {
1317         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1318       } else {
1319         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1320       }
1321       PetscFunctionReturn(0);
1322     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1323       PetscFunctionReturn(0);
1324     }
1325   } else if (isbinary) {
1326     if (size == 1) {
1327       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1328       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1329     } else {
1330       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1331     }
1332     PetscFunctionReturn(0);
1333   } else if (isdraw) {
1334     PetscDraw  draw;
1335     PetscBool  isnull;
1336     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1337     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1338   }
1339 
1340   if (size == 1) {
1341     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1342     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1343   } else {
1344     /* assemble the entire matrix onto first processor. */
1345     Mat         A;
1346     Mat_SeqAIJ  *Aloc;
1347     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1348     MatScalar   *a;
1349 
1350     if (mat->rmap->N > 1024) {
1351       PetscBool  flg = PETSC_FALSE;
1352 
1353       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1354       if (!flg) {
1355         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1356       }
1357     }
1358 
1359     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1360     if (!rank) {
1361       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1362     } else {
1363       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1364     }
1365     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1366     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1367     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1368     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1369 
1370     /* copy over the A part */
1371     Aloc = (Mat_SeqAIJ*)aij->A->data;
1372     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1373     row = mat->rmap->rstart;
1374     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1375     for (i=0; i<m; i++) {
1376       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1377       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1378     }
1379     aj = Aloc->j;
1380     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1381 
1382     /* copy over the B part */
1383     Aloc = (Mat_SeqAIJ*)aij->B->data;
1384     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1385     row  = mat->rmap->rstart;
1386     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1387     ct   = cols;
1388     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1389     for (i=0; i<m; i++) {
1390       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1391       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1392     }
1393     ierr = PetscFree(ct);CHKERRQ(ierr);
1394     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1395     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1396     /*
1397        Everyone has to call to draw the matrix since the graphics waits are
1398        synchronized across all processors that share the PetscDraw object
1399     */
1400     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1401     if (!rank) {
1402       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1403       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1404       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1405       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1406     }
1407     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1408     ierr = MatDestroy(A);CHKERRQ(ierr);
1409   }
1410   PetscFunctionReturn(0);
1411 }
1412 
1413 #undef __FUNCT__
1414 #define __FUNCT__ "MatView_MPIAIJ"
1415 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1416 {
1417   PetscErrorCode ierr;
1418   PetscBool      iascii,isdraw,issocket,isbinary;
1419 
1420   PetscFunctionBegin;
1421   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1422   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1423   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1424   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1425   if (iascii || isdraw || isbinary || issocket) {
1426     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1427   } else {
1428     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1429   }
1430   PetscFunctionReturn(0);
1431 }
1432 
1433 #undef __FUNCT__
1434 #define __FUNCT__ "MatSOR_MPIAIJ"
1435 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1436 {
1437   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1438   PetscErrorCode ierr;
1439   Vec            bb1 = 0;
1440   PetscBool      hasop;
1441 
1442   PetscFunctionBegin;
1443   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1444     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1445   }
1446 
1447   if (flag == SOR_APPLY_UPPER) {
1448     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1449     PetscFunctionReturn(0);
1450   }
1451 
1452   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1453     if (flag & SOR_ZERO_INITIAL_GUESS) {
1454       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1455       its--;
1456     }
1457 
1458     while (its--) {
1459       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1460       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1461 
1462       /* update rhs: bb1 = bb - B*x */
1463       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1464       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1465 
1466       /* local sweep */
1467       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1468     }
1469   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1470     if (flag & SOR_ZERO_INITIAL_GUESS) {
1471       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1472       its--;
1473     }
1474     while (its--) {
1475       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1476       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1477 
1478       /* update rhs: bb1 = bb - B*x */
1479       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1480       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1481 
1482       /* local sweep */
1483       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1484     }
1485   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1486     if (flag & SOR_ZERO_INITIAL_GUESS) {
1487       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1488       its--;
1489     }
1490     while (its--) {
1491       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1492       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1493 
1494       /* update rhs: bb1 = bb - B*x */
1495       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1496       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1497 
1498       /* local sweep */
1499       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1500     }
1501   }  else if (flag & SOR_EISENSTAT) {
1502     Vec         xx1;
1503 
1504     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1505     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1506 
1507     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1508     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1509     if (!mat->diag) {
1510       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1511       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1512     }
1513     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1514     if (hasop) {
1515       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1516     } else {
1517       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1518     }
1519     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1520 
1521     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1522 
1523     /* local sweep */
1524     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1525     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1526     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1527   } else {
1528     SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1529   }
1530 
1531   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1532   PetscFunctionReturn(0);
1533 }
1534 
1535 #undef __FUNCT__
1536 #define __FUNCT__ "MatPermute_MPIAIJ"
1537 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1538 {
1539   MPI_Comm       comm,pcomm;
1540   PetscInt       first,local_size,nrows;
1541   const PetscInt *rows;
1542   PetscMPIInt    size;
1543   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1544   PetscErrorCode ierr;
1545 
1546   PetscFunctionBegin;
1547   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1548   /* make a collective version of 'rowp' */
1549   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1550   if (pcomm==comm) {
1551     crowp = rowp;
1552   } else {
1553     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1554     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1555     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
1556     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1557   }
1558   /* collect the global row permutation and invert it */
1559   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1560   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1561   if (pcomm!=comm) {
1562     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1563   }
1564   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1565   /* get the local target indices */
1566   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1567   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1568   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1569   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1570   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1571   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1572   /* the column permutation is so much easier;
1573      make a local version of 'colp' and invert it */
1574   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1575   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1576   if (size==1) {
1577     lcolp = colp;
1578   } else {
1579     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1580     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1581     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
1582   }
1583   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1584   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1585   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1586   if (size>1) {
1587     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1588     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1589   }
1590   /* now we just get the submatrix */
1591   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1592   /* clean up */
1593   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1594   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1595   PetscFunctionReturn(0);
1596 }
1597 
1598 #undef __FUNCT__
1599 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1600 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1601 {
1602   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1603   Mat            A = mat->A,B = mat->B;
1604   PetscErrorCode ierr;
1605   PetscReal      isend[5],irecv[5];
1606 
1607   PetscFunctionBegin;
1608   info->block_size     = 1.0;
1609   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1610   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1611   isend[3] = info->memory;  isend[4] = info->mallocs;
1612   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1613   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1614   isend[3] += info->memory;  isend[4] += info->mallocs;
1615   if (flag == MAT_LOCAL) {
1616     info->nz_used      = isend[0];
1617     info->nz_allocated = isend[1];
1618     info->nz_unneeded  = isend[2];
1619     info->memory       = isend[3];
1620     info->mallocs      = isend[4];
1621   } else if (flag == MAT_GLOBAL_MAX) {
1622     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1623     info->nz_used      = irecv[0];
1624     info->nz_allocated = irecv[1];
1625     info->nz_unneeded  = irecv[2];
1626     info->memory       = irecv[3];
1627     info->mallocs      = irecv[4];
1628   } else if (flag == MAT_GLOBAL_SUM) {
1629     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1630     info->nz_used      = irecv[0];
1631     info->nz_allocated = irecv[1];
1632     info->nz_unneeded  = irecv[2];
1633     info->memory       = irecv[3];
1634     info->mallocs      = irecv[4];
1635   }
1636   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1637   info->fill_ratio_needed = 0;
1638   info->factor_mallocs    = 0;
1639 
1640   PetscFunctionReturn(0);
1641 }
1642 
1643 #undef __FUNCT__
1644 #define __FUNCT__ "MatSetOption_MPIAIJ"
1645 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1646 {
1647   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1648   PetscErrorCode ierr;
1649 
1650   PetscFunctionBegin;
1651   switch (op) {
1652   case MAT_NEW_NONZERO_LOCATIONS:
1653   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1654   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1655   case MAT_KEEP_NONZERO_PATTERN:
1656   case MAT_NEW_NONZERO_LOCATION_ERR:
1657   case MAT_USE_INODES:
1658   case MAT_IGNORE_ZERO_ENTRIES:
1659     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1660     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1661     break;
1662   case MAT_ROW_ORIENTED:
1663     a->roworiented = flg;
1664     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1665     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1666     break;
1667   case MAT_NEW_DIAGONALS:
1668     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1669     break;
1670   case MAT_IGNORE_OFF_PROC_ENTRIES:
1671     a->donotstash = PETSC_TRUE;
1672     break;
1673   case MAT_SPD:
1674     A->spd_set                         = PETSC_TRUE;
1675     A->spd                             = flg;
1676     if (flg) {
1677       A->symmetric                     = PETSC_TRUE;
1678       A->structurally_symmetric        = PETSC_TRUE;
1679       A->symmetric_set                 = PETSC_TRUE;
1680       A->structurally_symmetric_set    = PETSC_TRUE;
1681     }
1682     break;
1683   case MAT_SYMMETRIC:
1684     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1685     break;
1686   case MAT_STRUCTURALLY_SYMMETRIC:
1687     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1688     break;
1689   case MAT_HERMITIAN:
1690     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1691     break;
1692   case MAT_SYMMETRY_ETERNAL:
1693     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1694     break;
1695   default:
1696     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1697   }
1698   PetscFunctionReturn(0);
1699 }
1700 
1701 #undef __FUNCT__
1702 #define __FUNCT__ "MatGetRow_MPIAIJ"
1703 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1704 {
1705   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1706   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1707   PetscErrorCode ierr;
1708   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1709   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1710   PetscInt       *cmap,*idx_p;
1711 
1712   PetscFunctionBegin;
1713   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1714   mat->getrowactive = PETSC_TRUE;
1715 
1716   if (!mat->rowvalues && (idx || v)) {
1717     /*
1718         allocate enough space to hold information from the longest row.
1719     */
1720     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1721     PetscInt   max = 1,tmp;
1722     for (i=0; i<matin->rmap->n; i++) {
1723       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1724       if (max < tmp) { max = tmp; }
1725     }
1726     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1727   }
1728 
1729   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1730   lrow = row - rstart;
1731 
1732   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1733   if (!v)   {pvA = 0; pvB = 0;}
1734   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1735   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1736   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1737   nztot = nzA + nzB;
1738 
1739   cmap  = mat->garray;
1740   if (v  || idx) {
1741     if (nztot) {
1742       /* Sort by increasing column numbers, assuming A and B already sorted */
1743       PetscInt imark = -1;
1744       if (v) {
1745         *v = v_p = mat->rowvalues;
1746         for (i=0; i<nzB; i++) {
1747           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1748           else break;
1749         }
1750         imark = i;
1751         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1752         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1753       }
1754       if (idx) {
1755         *idx = idx_p = mat->rowindices;
1756         if (imark > -1) {
1757           for (i=0; i<imark; i++) {
1758             idx_p[i] = cmap[cworkB[i]];
1759           }
1760         } else {
1761           for (i=0; i<nzB; i++) {
1762             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1763             else break;
1764           }
1765           imark = i;
1766         }
1767         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1768         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1769       }
1770     } else {
1771       if (idx) *idx = 0;
1772       if (v)   *v   = 0;
1773     }
1774   }
1775   *nz = nztot;
1776   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1777   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1778   PetscFunctionReturn(0);
1779 }
1780 
1781 #undef __FUNCT__
1782 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1783 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1784 {
1785   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1786 
1787   PetscFunctionBegin;
1788   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1789   aij->getrowactive = PETSC_FALSE;
1790   PetscFunctionReturn(0);
1791 }
1792 
1793 #undef __FUNCT__
1794 #define __FUNCT__ "MatNorm_MPIAIJ"
1795 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1796 {
1797   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1798   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1799   PetscErrorCode ierr;
1800   PetscInt       i,j,cstart = mat->cmap->rstart;
1801   PetscReal      sum = 0.0;
1802   MatScalar      *v;
1803 
1804   PetscFunctionBegin;
1805   if (aij->size == 1) {
1806     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1807   } else {
1808     if (type == NORM_FROBENIUS) {
1809       v = amat->a;
1810       for (i=0; i<amat->nz; i++) {
1811 #if defined(PETSC_USE_COMPLEX)
1812         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1813 #else
1814         sum += (*v)*(*v); v++;
1815 #endif
1816       }
1817       v = bmat->a;
1818       for (i=0; i<bmat->nz; i++) {
1819 #if defined(PETSC_USE_COMPLEX)
1820         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1821 #else
1822         sum += (*v)*(*v); v++;
1823 #endif
1824       }
1825       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1826       *norm = sqrt(*norm);
1827     } else if (type == NORM_1) { /* max column norm */
1828       PetscReal *tmp,*tmp2;
1829       PetscInt  *jj,*garray = aij->garray;
1830       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1831       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1832       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1833       *norm = 0.0;
1834       v = amat->a; jj = amat->j;
1835       for (j=0; j<amat->nz; j++) {
1836         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1837       }
1838       v = bmat->a; jj = bmat->j;
1839       for (j=0; j<bmat->nz; j++) {
1840         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1841       }
1842       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1843       for (j=0; j<mat->cmap->N; j++) {
1844         if (tmp2[j] > *norm) *norm = tmp2[j];
1845       }
1846       ierr = PetscFree(tmp);CHKERRQ(ierr);
1847       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1848     } else if (type == NORM_INFINITY) { /* max row norm */
1849       PetscReal ntemp = 0.0;
1850       for (j=0; j<aij->A->rmap->n; j++) {
1851         v = amat->a + amat->i[j];
1852         sum = 0.0;
1853         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1854           sum += PetscAbsScalar(*v); v++;
1855         }
1856         v = bmat->a + bmat->i[j];
1857         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1858           sum += PetscAbsScalar(*v); v++;
1859         }
1860         if (sum > ntemp) ntemp = sum;
1861       }
1862       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1863     } else {
1864       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1865     }
1866   }
1867   PetscFunctionReturn(0);
1868 }
1869 
1870 #undef __FUNCT__
1871 #define __FUNCT__ "MatTranspose_MPIAIJ"
1872 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1873 {
1874   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1875   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1876   PetscErrorCode ierr;
1877   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1878   PetscInt       cstart=A->cmap->rstart,ncol;
1879   Mat            B;
1880   MatScalar      *array;
1881 
1882   PetscFunctionBegin;
1883   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1884 
1885   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1886   ai = Aloc->i; aj = Aloc->j;
1887   bi = Bloc->i; bj = Bloc->j;
1888   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1889     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1890     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1891     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1892     for (i=0; i<ai[ma]; i++){
1893       d_nnz[aj[i]] ++;
1894       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1895     }
1896 
1897     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1898     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1899     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1900     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1901     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1902   } else {
1903     B = *matout;
1904   }
1905 
1906   /* copy over the A part */
1907   array = Aloc->a;
1908   row = A->rmap->rstart;
1909   for (i=0; i<ma; i++) {
1910     ncol = ai[i+1]-ai[i];
1911     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1912     row++; array += ncol; aj += ncol;
1913   }
1914   aj = Aloc->j;
1915   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1916 
1917   /* copy over the B part */
1918   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1919   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1920   array = Bloc->a;
1921   row = A->rmap->rstart;
1922   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1923   cols_tmp = cols;
1924   for (i=0; i<mb; i++) {
1925     ncol = bi[i+1]-bi[i];
1926     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1927     row++; array += ncol; cols_tmp += ncol;
1928   }
1929   ierr = PetscFree(cols);CHKERRQ(ierr);
1930 
1931   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1932   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1933   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1934     *matout = B;
1935   } else {
1936     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1937   }
1938   PetscFunctionReturn(0);
1939 }
1940 
1941 #undef __FUNCT__
1942 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1943 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1944 {
1945   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1946   Mat            a = aij->A,b = aij->B;
1947   PetscErrorCode ierr;
1948   PetscInt       s1,s2,s3;
1949 
1950   PetscFunctionBegin;
1951   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1952   if (rr) {
1953     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1954     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1955     /* Overlap communication with computation. */
1956     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1957   }
1958   if (ll) {
1959     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1960     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1961     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1962   }
1963   /* scale  the diagonal block */
1964   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1965 
1966   if (rr) {
1967     /* Do a scatter end and then right scale the off-diagonal block */
1968     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1969     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1970   }
1971 
1972   PetscFunctionReturn(0);
1973 }
1974 
1975 #undef __FUNCT__
1976 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1977 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1978 {
1979   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1980   PetscErrorCode ierr;
1981 
1982   PetscFunctionBegin;
1983   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1984   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1985   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
1986   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
1987   PetscFunctionReturn(0);
1988 }
1989 #undef __FUNCT__
1990 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1991 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1992 {
1993   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1994   PetscErrorCode ierr;
1995 
1996   PetscFunctionBegin;
1997   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1998   PetscFunctionReturn(0);
1999 }
2000 
2001 #undef __FUNCT__
2002 #define __FUNCT__ "MatEqual_MPIAIJ"
2003 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2004 {
2005   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2006   Mat            a,b,c,d;
2007   PetscBool      flg;
2008   PetscErrorCode ierr;
2009 
2010   PetscFunctionBegin;
2011   a = matA->A; b = matA->B;
2012   c = matB->A; d = matB->B;
2013 
2014   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2015   if (flg) {
2016     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2017   }
2018   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2019   PetscFunctionReturn(0);
2020 }
2021 
2022 #undef __FUNCT__
2023 #define __FUNCT__ "MatCopy_MPIAIJ"
2024 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2025 {
2026   PetscErrorCode ierr;
2027   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2028   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2029 
2030   PetscFunctionBegin;
2031   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2032   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2033     /* because of the column compression in the off-processor part of the matrix a->B,
2034        the number of columns in a->B and b->B may be different, hence we cannot call
2035        the MatCopy() directly on the two parts. If need be, we can provide a more
2036        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2037        then copying the submatrices */
2038     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2039   } else {
2040     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2041     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2042   }
2043   PetscFunctionReturn(0);
2044 }
2045 
2046 #undef __FUNCT__
2047 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
2048 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
2049 {
2050   PetscErrorCode ierr;
2051 
2052   PetscFunctionBegin;
2053   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2054   PetscFunctionReturn(0);
2055 }
2056 
2057 #undef __FUNCT__
2058 #define __FUNCT__ "MatAXPY_MPIAIJ"
2059 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2060 {
2061   PetscErrorCode ierr;
2062   PetscInt       i;
2063   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2064   PetscBLASInt   bnz,one=1;
2065   Mat_SeqAIJ     *x,*y;
2066 
2067   PetscFunctionBegin;
2068   if (str == SAME_NONZERO_PATTERN) {
2069     PetscScalar alpha = a;
2070     x = (Mat_SeqAIJ *)xx->A->data;
2071     y = (Mat_SeqAIJ *)yy->A->data;
2072     bnz = PetscBLASIntCast(x->nz);
2073     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2074     x = (Mat_SeqAIJ *)xx->B->data;
2075     y = (Mat_SeqAIJ *)yy->B->data;
2076     bnz = PetscBLASIntCast(x->nz);
2077     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2078   } else if (str == SUBSET_NONZERO_PATTERN) {
2079     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2080 
2081     x = (Mat_SeqAIJ *)xx->B->data;
2082     y = (Mat_SeqAIJ *)yy->B->data;
2083     if (y->xtoy && y->XtoY != xx->B) {
2084       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2085       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2086     }
2087     if (!y->xtoy) { /* get xtoy */
2088       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2089       y->XtoY = xx->B;
2090       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2091     }
2092     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2093   } else {
2094     Mat B;
2095     PetscInt *nnz_d,*nnz_o;
2096     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2097     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2098     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2099     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2100     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2101     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2102     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2103     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr);
2104     ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr);
2105     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2106     ierr = MatHeaderReplace(Y,B);
2107     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2108     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2109   }
2110   PetscFunctionReturn(0);
2111 }
2112 
2113 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2114 
2115 #undef __FUNCT__
2116 #define __FUNCT__ "MatConjugate_MPIAIJ"
2117 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2118 {
2119 #if defined(PETSC_USE_COMPLEX)
2120   PetscErrorCode ierr;
2121   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2122 
2123   PetscFunctionBegin;
2124   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2125   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2126 #else
2127   PetscFunctionBegin;
2128 #endif
2129   PetscFunctionReturn(0);
2130 }
2131 
2132 #undef __FUNCT__
2133 #define __FUNCT__ "MatRealPart_MPIAIJ"
2134 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2135 {
2136   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2137   PetscErrorCode ierr;
2138 
2139   PetscFunctionBegin;
2140   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2141   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2142   PetscFunctionReturn(0);
2143 }
2144 
2145 #undef __FUNCT__
2146 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2147 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2148 {
2149   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2150   PetscErrorCode ierr;
2151 
2152   PetscFunctionBegin;
2153   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2154   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2155   PetscFunctionReturn(0);
2156 }
2157 
2158 #ifdef PETSC_HAVE_PBGL
2159 
2160 #include <boost/parallel/mpi/bsp_process_group.hpp>
2161 #include <boost/graph/distributed/ilu_default_graph.hpp>
2162 #include <boost/graph/distributed/ilu_0_block.hpp>
2163 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2164 #include <boost/graph/distributed/petsc/interface.hpp>
2165 #include <boost/multi_array.hpp>
2166 #include <boost/parallel/distributed_property_map->hpp>
2167 
2168 #undef __FUNCT__
2169 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2170 /*
2171   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2172 */
2173 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2174 {
2175   namespace petsc = boost::distributed::petsc;
2176 
2177   namespace graph_dist = boost::graph::distributed;
2178   using boost::graph::distributed::ilu_default::process_group_type;
2179   using boost::graph::ilu_permuted;
2180 
2181   PetscBool       row_identity, col_identity;
2182   PetscContainer  c;
2183   PetscInt        m, n, M, N;
2184   PetscErrorCode  ierr;
2185 
2186   PetscFunctionBegin;
2187   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2188   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2189   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2190   if (!row_identity || !col_identity) {
2191     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2192   }
2193 
2194   process_group_type pg;
2195   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2196   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2197   lgraph_type&   level_graph = *lgraph_p;
2198   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2199 
2200   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2201   ilu_permuted(level_graph);
2202 
2203   /* put together the new matrix */
2204   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2205   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2206   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2207   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2208   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2209   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2210   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2211 
2212   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2213   ierr = PetscContainerSetPointer(c, lgraph_p);
2214   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2215   PetscFunctionReturn(0);
2216 }
2217 
2218 #undef __FUNCT__
2219 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2220 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2221 {
2222   PetscFunctionBegin;
2223   PetscFunctionReturn(0);
2224 }
2225 
2226 #undef __FUNCT__
2227 #define __FUNCT__ "MatSolve_MPIAIJ"
2228 /*
2229   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2230 */
2231 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2232 {
2233   namespace graph_dist = boost::graph::distributed;
2234 
2235   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2236   lgraph_type*   lgraph_p;
2237   PetscContainer c;
2238   PetscErrorCode ierr;
2239 
2240   PetscFunctionBegin;
2241   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2242   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2243   ierr = VecCopy(b, x);CHKERRQ(ierr);
2244 
2245   PetscScalar* array_x;
2246   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2247   PetscInt sx;
2248   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2249 
2250   PetscScalar* array_b;
2251   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2252   PetscInt sb;
2253   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2254 
2255   lgraph_type&   level_graph = *lgraph_p;
2256   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2257 
2258   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2259   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2260                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2261 
2262   typedef boost::iterator_property_map<array_ref_type::iterator,
2263                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2264   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2265                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2266 
2267   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2268 
2269   PetscFunctionReturn(0);
2270 }
2271 #endif
2272 
2273 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2274   PetscInt       nzlocal,nsends,nrecvs;
2275   PetscMPIInt    *send_rank,*recv_rank;
2276   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2277   PetscScalar    *sbuf_a,**rbuf_a;
2278   PetscErrorCode (*MatDestroy)(Mat);
2279 } Mat_Redundant;
2280 
2281 #undef __FUNCT__
2282 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2283 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2284 {
2285   PetscErrorCode       ierr;
2286   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2287   PetscInt             i;
2288 
2289   PetscFunctionBegin;
2290   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2291   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2292   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2293   for (i=0; i<redund->nrecvs; i++){
2294     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2295     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2296   }
2297   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2298   ierr = PetscFree(redund);CHKERRQ(ierr);
2299   PetscFunctionReturn(0);
2300 }
2301 
2302 #undef __FUNCT__
2303 #define __FUNCT__ "MatDestroy_MatRedundant"
2304 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2305 {
2306   PetscErrorCode  ierr;
2307   PetscContainer  container;
2308   Mat_Redundant   *redund=PETSC_NULL;
2309 
2310   PetscFunctionBegin;
2311   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2312   if (container) {
2313     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2314   } else {
2315     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2316   }
2317   A->ops->destroy = redund->MatDestroy;
2318   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2319   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2320   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2321   PetscFunctionReturn(0);
2322 }
2323 
2324 #undef __FUNCT__
2325 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2326 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2327 {
2328   PetscMPIInt    rank,size;
2329   MPI_Comm       comm=((PetscObject)mat)->comm;
2330   PetscErrorCode ierr;
2331   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2332   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2333   PetscInt       *rowrange=mat->rmap->range;
2334   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2335   Mat            A=aij->A,B=aij->B,C=*matredundant;
2336   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2337   PetscScalar    *sbuf_a;
2338   PetscInt       nzlocal=a->nz+b->nz;
2339   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2340   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2341   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2342   MatScalar      *aworkA,*aworkB;
2343   PetscScalar    *vals;
2344   PetscMPIInt    tag1,tag2,tag3,imdex;
2345   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2346                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2347   MPI_Status     recv_status,*send_status;
2348   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2349   PetscInt       **rbuf_j=PETSC_NULL;
2350   PetscScalar    **rbuf_a=PETSC_NULL;
2351   Mat_Redundant  *redund=PETSC_NULL;
2352   PetscContainer container;
2353 
2354   PetscFunctionBegin;
2355   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2356   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2357 
2358   if (reuse == MAT_REUSE_MATRIX) {
2359     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2360     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2361     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2362     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2363     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2364     if (container) {
2365       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2366     } else {
2367       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2368     }
2369     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2370 
2371     nsends    = redund->nsends;
2372     nrecvs    = redund->nrecvs;
2373     send_rank = redund->send_rank;
2374     recv_rank = redund->recv_rank;
2375     sbuf_nz   = redund->sbuf_nz;
2376     rbuf_nz   = redund->rbuf_nz;
2377     sbuf_j    = redund->sbuf_j;
2378     sbuf_a    = redund->sbuf_a;
2379     rbuf_j    = redund->rbuf_j;
2380     rbuf_a    = redund->rbuf_a;
2381   }
2382 
2383   if (reuse == MAT_INITIAL_MATRIX){
2384     PetscMPIInt  subrank,subsize;
2385     PetscInt     nleftover,np_subcomm;
2386     /* get the destination processors' id send_rank, nsends and nrecvs */
2387     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2388     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2389     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2390     np_subcomm = size/nsubcomm;
2391     nleftover  = size - nsubcomm*np_subcomm;
2392     nsends = 0; nrecvs = 0;
2393     for (i=0; i<size; i++){ /* i=rank*/
2394       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2395         send_rank[nsends] = i; nsends++;
2396         recv_rank[nrecvs++] = i;
2397       }
2398     }
2399     if (rank >= size - nleftover){/* this proc is a leftover processor */
2400       i = size-nleftover-1;
2401       j = 0;
2402       while (j < nsubcomm - nleftover){
2403         send_rank[nsends++] = i;
2404         i--; j++;
2405       }
2406     }
2407 
2408     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2409       for (i=0; i<nleftover; i++){
2410         recv_rank[nrecvs++] = size-nleftover+i;
2411       }
2412     }
2413 
2414     /* allocate sbuf_j, sbuf_a */
2415     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2416     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2417     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2418   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2419 
2420   /* copy mat's local entries into the buffers */
2421   if (reuse == MAT_INITIAL_MATRIX){
2422     rownz_max = 0;
2423     rptr = sbuf_j;
2424     cols = sbuf_j + rend-rstart + 1;
2425     vals = sbuf_a;
2426     rptr[0] = 0;
2427     for (i=0; i<rend-rstart; i++){
2428       row = i + rstart;
2429       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2430       ncols  = nzA + nzB;
2431       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2432       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2433       /* load the column indices for this row into cols */
2434       lwrite = 0;
2435       for (l=0; l<nzB; l++) {
2436         if ((ctmp = bmap[cworkB[l]]) < cstart){
2437           vals[lwrite]   = aworkB[l];
2438           cols[lwrite++] = ctmp;
2439         }
2440       }
2441       for (l=0; l<nzA; l++){
2442         vals[lwrite]   = aworkA[l];
2443         cols[lwrite++] = cstart + cworkA[l];
2444       }
2445       for (l=0; l<nzB; l++) {
2446         if ((ctmp = bmap[cworkB[l]]) >= cend){
2447           vals[lwrite]   = aworkB[l];
2448           cols[lwrite++] = ctmp;
2449         }
2450       }
2451       vals += ncols;
2452       cols += ncols;
2453       rptr[i+1] = rptr[i] + ncols;
2454       if (rownz_max < ncols) rownz_max = ncols;
2455     }
2456     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2457   } else { /* only copy matrix values into sbuf_a */
2458     rptr = sbuf_j;
2459     vals = sbuf_a;
2460     rptr[0] = 0;
2461     for (i=0; i<rend-rstart; i++){
2462       row = i + rstart;
2463       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2464       ncols  = nzA + nzB;
2465       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2466       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2467       lwrite = 0;
2468       for (l=0; l<nzB; l++) {
2469         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2470       }
2471       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2472       for (l=0; l<nzB; l++) {
2473         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2474       }
2475       vals += ncols;
2476       rptr[i+1] = rptr[i] + ncols;
2477     }
2478   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2479 
2480   /* send nzlocal to others, and recv other's nzlocal */
2481   /*--------------------------------------------------*/
2482   if (reuse == MAT_INITIAL_MATRIX){
2483     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2484     s_waits2 = s_waits3 + nsends;
2485     s_waits1 = s_waits2 + nsends;
2486     r_waits1 = s_waits1 + nsends;
2487     r_waits2 = r_waits1 + nrecvs;
2488     r_waits3 = r_waits2 + nrecvs;
2489   } else {
2490     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2491     r_waits3 = s_waits3 + nsends;
2492   }
2493 
2494   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2495   if (reuse == MAT_INITIAL_MATRIX){
2496     /* get new tags to keep the communication clean */
2497     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2498     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2499     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2500 
2501     /* post receives of other's nzlocal */
2502     for (i=0; i<nrecvs; i++){
2503       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2504     }
2505     /* send nzlocal to others */
2506     for (i=0; i<nsends; i++){
2507       sbuf_nz[i] = nzlocal;
2508       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2509     }
2510     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2511     count = nrecvs;
2512     while (count) {
2513       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2514       recv_rank[imdex] = recv_status.MPI_SOURCE;
2515       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2516       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2517 
2518       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2519       rbuf_nz[imdex] += i + 2;
2520       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2521       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2522       count--;
2523     }
2524     /* wait on sends of nzlocal */
2525     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2526     /* send mat->i,j to others, and recv from other's */
2527     /*------------------------------------------------*/
2528     for (i=0; i<nsends; i++){
2529       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2530       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2531     }
2532     /* wait on receives of mat->i,j */
2533     /*------------------------------*/
2534     count = nrecvs;
2535     while (count) {
2536       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2537       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2538       count--;
2539     }
2540     /* wait on sends of mat->i,j */
2541     /*---------------------------*/
2542     if (nsends) {
2543       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2544     }
2545   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2546 
2547   /* post receives, send and receive mat->a */
2548   /*----------------------------------------*/
2549   for (imdex=0; imdex<nrecvs; imdex++) {
2550     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2551   }
2552   for (i=0; i<nsends; i++){
2553     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2554   }
2555   count = nrecvs;
2556   while (count) {
2557     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2558     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2559     count--;
2560   }
2561   if (nsends) {
2562     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2563   }
2564 
2565   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2566 
2567   /* create redundant matrix */
2568   /*-------------------------*/
2569   if (reuse == MAT_INITIAL_MATRIX){
2570     /* compute rownz_max for preallocation */
2571     for (imdex=0; imdex<nrecvs; imdex++){
2572       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2573       rptr = rbuf_j[imdex];
2574       for (i=0; i<j; i++){
2575         ncols = rptr[i+1] - rptr[i];
2576         if (rownz_max < ncols) rownz_max = ncols;
2577       }
2578     }
2579 
2580     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2581     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2582     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2583     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2584     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2585   } else {
2586     C = *matredundant;
2587   }
2588 
2589   /* insert local matrix entries */
2590   rptr = sbuf_j;
2591   cols = sbuf_j + rend-rstart + 1;
2592   vals = sbuf_a;
2593   for (i=0; i<rend-rstart; i++){
2594     row   = i + rstart;
2595     ncols = rptr[i+1] - rptr[i];
2596     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2597     vals += ncols;
2598     cols += ncols;
2599   }
2600   /* insert received matrix entries */
2601   for (imdex=0; imdex<nrecvs; imdex++){
2602     rstart = rowrange[recv_rank[imdex]];
2603     rend   = rowrange[recv_rank[imdex]+1];
2604     rptr = rbuf_j[imdex];
2605     cols = rbuf_j[imdex] + rend-rstart + 1;
2606     vals = rbuf_a[imdex];
2607     for (i=0; i<rend-rstart; i++){
2608       row   = i + rstart;
2609       ncols = rptr[i+1] - rptr[i];
2610       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2611       vals += ncols;
2612       cols += ncols;
2613     }
2614   }
2615   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2616   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2617   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2618   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2619   if (reuse == MAT_INITIAL_MATRIX){
2620     PetscContainer container;
2621     *matredundant = C;
2622     /* create a supporting struct and attach it to C for reuse */
2623     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2624     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2625     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2626     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2627     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2628 
2629     redund->nzlocal = nzlocal;
2630     redund->nsends  = nsends;
2631     redund->nrecvs  = nrecvs;
2632     redund->send_rank = send_rank;
2633     redund->recv_rank = recv_rank;
2634     redund->sbuf_nz = sbuf_nz;
2635     redund->rbuf_nz = rbuf_nz;
2636     redund->sbuf_j  = sbuf_j;
2637     redund->sbuf_a  = sbuf_a;
2638     redund->rbuf_j  = rbuf_j;
2639     redund->rbuf_a  = rbuf_a;
2640 
2641     redund->MatDestroy = C->ops->destroy;
2642     C->ops->destroy    = MatDestroy_MatRedundant;
2643   }
2644   PetscFunctionReturn(0);
2645 }
2646 
2647 #undef __FUNCT__
2648 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2649 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2650 {
2651   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2652   PetscErrorCode ierr;
2653   PetscInt       i,*idxb = 0;
2654   PetscScalar    *va,*vb;
2655   Vec            vtmp;
2656 
2657   PetscFunctionBegin;
2658   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2659   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2660   if (idx) {
2661     for (i=0; i<A->rmap->n; i++) {
2662       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2663     }
2664   }
2665 
2666   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2667   if (idx) {
2668     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2669   }
2670   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2671   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2672 
2673   for (i=0; i<A->rmap->n; i++){
2674     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2675       va[i] = vb[i];
2676       if (idx) idx[i] = a->garray[idxb[i]];
2677     }
2678   }
2679 
2680   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2681   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2682   if (idxb) {
2683     ierr = PetscFree(idxb);CHKERRQ(ierr);
2684   }
2685   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2686   PetscFunctionReturn(0);
2687 }
2688 
2689 #undef __FUNCT__
2690 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2691 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2692 {
2693   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2694   PetscErrorCode ierr;
2695   PetscInt       i,*idxb = 0;
2696   PetscScalar    *va,*vb;
2697   Vec            vtmp;
2698 
2699   PetscFunctionBegin;
2700   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2701   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2702   if (idx) {
2703     for (i=0; i<A->cmap->n; i++) {
2704       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2705     }
2706   }
2707 
2708   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2709   if (idx) {
2710     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2711   }
2712   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2713   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2714 
2715   for (i=0; i<A->rmap->n; i++){
2716     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2717       va[i] = vb[i];
2718       if (idx) idx[i] = a->garray[idxb[i]];
2719     }
2720   }
2721 
2722   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2723   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2724   if (idxb) {
2725     ierr = PetscFree(idxb);CHKERRQ(ierr);
2726   }
2727   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2728   PetscFunctionReturn(0);
2729 }
2730 
2731 #undef __FUNCT__
2732 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2733 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2734 {
2735   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2736   PetscInt       n      = A->rmap->n;
2737   PetscInt       cstart = A->cmap->rstart;
2738   PetscInt      *cmap   = mat->garray;
2739   PetscInt      *diagIdx, *offdiagIdx;
2740   Vec            diagV, offdiagV;
2741   PetscScalar   *a, *diagA, *offdiagA;
2742   PetscInt       r;
2743   PetscErrorCode ierr;
2744 
2745   PetscFunctionBegin;
2746   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2747   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2748   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2749   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2750   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2751   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2752   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2753   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2754   for(r = 0; r < n; ++r) {
2755     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2756       a[r]   = diagA[r];
2757       idx[r] = cstart + diagIdx[r];
2758     } else {
2759       a[r]   = offdiagA[r];
2760       idx[r] = cmap[offdiagIdx[r]];
2761     }
2762   }
2763   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2764   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2765   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2766   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2767   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2768   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2769   PetscFunctionReturn(0);
2770 }
2771 
2772 #undef __FUNCT__
2773 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2774 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2775 {
2776   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2777   PetscInt       n      = A->rmap->n;
2778   PetscInt       cstart = A->cmap->rstart;
2779   PetscInt      *cmap   = mat->garray;
2780   PetscInt      *diagIdx, *offdiagIdx;
2781   Vec            diagV, offdiagV;
2782   PetscScalar   *a, *diagA, *offdiagA;
2783   PetscInt       r;
2784   PetscErrorCode ierr;
2785 
2786   PetscFunctionBegin;
2787   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2788   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2789   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2790   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2791   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2792   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2793   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2794   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2795   for(r = 0; r < n; ++r) {
2796     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2797       a[r]   = diagA[r];
2798       idx[r] = cstart + diagIdx[r];
2799     } else {
2800       a[r]   = offdiagA[r];
2801       idx[r] = cmap[offdiagIdx[r]];
2802     }
2803   }
2804   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2805   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2806   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2807   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2808   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2809   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2810   PetscFunctionReturn(0);
2811 }
2812 
2813 #undef __FUNCT__
2814 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2815 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2816 {
2817   PetscErrorCode ierr;
2818   Mat            *dummy;
2819 
2820   PetscFunctionBegin;
2821   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2822   *newmat = *dummy;
2823   ierr = PetscFree(dummy);CHKERRQ(ierr);
2824   PetscFunctionReturn(0);
2825 }
2826 
2827 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2828 /* -------------------------------------------------------------------*/
2829 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2830        MatGetRow_MPIAIJ,
2831        MatRestoreRow_MPIAIJ,
2832        MatMult_MPIAIJ,
2833 /* 4*/ MatMultAdd_MPIAIJ,
2834        MatMultTranspose_MPIAIJ,
2835        MatMultTransposeAdd_MPIAIJ,
2836 #ifdef PETSC_HAVE_PBGL
2837        MatSolve_MPIAIJ,
2838 #else
2839        0,
2840 #endif
2841        0,
2842        0,
2843 /*10*/ 0,
2844        0,
2845        0,
2846        MatSOR_MPIAIJ,
2847        MatTranspose_MPIAIJ,
2848 /*15*/ MatGetInfo_MPIAIJ,
2849        MatEqual_MPIAIJ,
2850        MatGetDiagonal_MPIAIJ,
2851        MatDiagonalScale_MPIAIJ,
2852        MatNorm_MPIAIJ,
2853 /*20*/ MatAssemblyBegin_MPIAIJ,
2854        MatAssemblyEnd_MPIAIJ,
2855        MatSetOption_MPIAIJ,
2856        MatZeroEntries_MPIAIJ,
2857 /*24*/ MatZeroRows_MPIAIJ,
2858        0,
2859 #ifdef PETSC_HAVE_PBGL
2860        0,
2861 #else
2862        0,
2863 #endif
2864        0,
2865        0,
2866 /*29*/ MatSetUpPreallocation_MPIAIJ,
2867 #ifdef PETSC_HAVE_PBGL
2868        0,
2869 #else
2870        0,
2871 #endif
2872        0,
2873        0,
2874        0,
2875 /*34*/ MatDuplicate_MPIAIJ,
2876        0,
2877        0,
2878        0,
2879        0,
2880 /*39*/ MatAXPY_MPIAIJ,
2881        MatGetSubMatrices_MPIAIJ,
2882        MatIncreaseOverlap_MPIAIJ,
2883        MatGetValues_MPIAIJ,
2884        MatCopy_MPIAIJ,
2885 /*44*/ MatGetRowMax_MPIAIJ,
2886        MatScale_MPIAIJ,
2887        0,
2888        0,
2889        MatZeroRowsColumns_MPIAIJ,
2890 /*49*/ MatSetBlockSize_MPIAIJ,
2891        0,
2892        0,
2893        0,
2894        0,
2895 /*54*/ MatFDColoringCreate_MPIAIJ,
2896        0,
2897        MatSetUnfactored_MPIAIJ,
2898        MatPermute_MPIAIJ,
2899        0,
2900 /*59*/ MatGetSubMatrix_MPIAIJ,
2901        MatDestroy_MPIAIJ,
2902        MatView_MPIAIJ,
2903        0,
2904        0,
2905 /*64*/ 0,
2906        0,
2907        0,
2908        0,
2909        0,
2910 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2911        MatGetRowMinAbs_MPIAIJ,
2912        0,
2913        MatSetColoring_MPIAIJ,
2914 #if defined(PETSC_HAVE_ADIC)
2915        MatSetValuesAdic_MPIAIJ,
2916 #else
2917        0,
2918 #endif
2919        MatSetValuesAdifor_MPIAIJ,
2920 /*75*/ MatFDColoringApply_AIJ,
2921        0,
2922        0,
2923        0,
2924        0,
2925 /*80*/ 0,
2926        0,
2927        0,
2928 /*83*/ MatLoad_MPIAIJ,
2929        0,
2930        0,
2931        0,
2932        0,
2933        0,
2934 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2935        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2936        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2937        MatPtAP_Basic,
2938        MatPtAPSymbolic_MPIAIJ,
2939 /*94*/ MatPtAPNumeric_MPIAIJ,
2940        0,
2941        0,
2942        0,
2943        0,
2944 /*99*/ 0,
2945        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2946        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2947        MatConjugate_MPIAIJ,
2948        0,
2949 /*104*/MatSetValuesRow_MPIAIJ,
2950        MatRealPart_MPIAIJ,
2951        MatImaginaryPart_MPIAIJ,
2952        0,
2953        0,
2954 /*109*/0,
2955        MatGetRedundantMatrix_MPIAIJ,
2956        MatGetRowMin_MPIAIJ,
2957        0,
2958        0,
2959 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
2960        0,
2961        0,
2962        0,
2963        0,
2964 /*119*/0,
2965        0,
2966        0,
2967        0,
2968        MatGetMultiProcBlock_MPIAIJ,
2969 /*124*/0,
2970        0,
2971        0,
2972        0,
2973        MatGetSubMatricesParallel_MPIAIJ
2974 };
2975 
2976 /* ----------------------------------------------------------------------------------------*/
2977 
2978 EXTERN_C_BEGIN
2979 #undef __FUNCT__
2980 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2981 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2982 {
2983   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2984   PetscErrorCode ierr;
2985 
2986   PetscFunctionBegin;
2987   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2988   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2989   PetscFunctionReturn(0);
2990 }
2991 EXTERN_C_END
2992 
2993 EXTERN_C_BEGIN
2994 #undef __FUNCT__
2995 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2996 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2997 {
2998   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2999   PetscErrorCode ierr;
3000 
3001   PetscFunctionBegin;
3002   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3003   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3004   PetscFunctionReturn(0);
3005 }
3006 EXTERN_C_END
3007 
3008 EXTERN_C_BEGIN
3009 #undef __FUNCT__
3010 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3011 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3012 {
3013   Mat_MPIAIJ     *b;
3014   PetscErrorCode ierr;
3015   PetscInt       i;
3016 
3017   PetscFunctionBegin;
3018   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3019   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3020   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3021   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3022 
3023   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3024   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3025   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3026   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3027   if (d_nnz) {
3028     for (i=0; i<B->rmap->n; i++) {
3029       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3030     }
3031   }
3032   if (o_nnz) {
3033     for (i=0; i<B->rmap->n; i++) {
3034       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3035     }
3036   }
3037   b = (Mat_MPIAIJ*)B->data;
3038 
3039   if (!B->preallocated) {
3040     /* Explicitly create 2 MATSEQAIJ matrices. */
3041     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3042     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3043     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3044     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3045     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3046     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3047     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3048     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3049   }
3050 
3051   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3052   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3053   B->preallocated = PETSC_TRUE;
3054   PetscFunctionReturn(0);
3055 }
3056 EXTERN_C_END
3057 
3058 #undef __FUNCT__
3059 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3060 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3061 {
3062   Mat            mat;
3063   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3064   PetscErrorCode ierr;
3065 
3066   PetscFunctionBegin;
3067   *newmat       = 0;
3068   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3069   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3070   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3071   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3072   a    = (Mat_MPIAIJ*)mat->data;
3073 
3074   mat->factortype    = matin->factortype;
3075   mat->rmap->bs      = matin->rmap->bs;
3076   mat->assembled    = PETSC_TRUE;
3077   mat->insertmode   = NOT_SET_VALUES;
3078   mat->preallocated = PETSC_TRUE;
3079 
3080   a->size           = oldmat->size;
3081   a->rank           = oldmat->rank;
3082   a->donotstash     = oldmat->donotstash;
3083   a->roworiented    = oldmat->roworiented;
3084   a->rowindices     = 0;
3085   a->rowvalues      = 0;
3086   a->getrowactive   = PETSC_FALSE;
3087 
3088   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3089   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3090 
3091   if (oldmat->colmap) {
3092 #if defined (PETSC_USE_CTABLE)
3093     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3094 #else
3095     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3096     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3097     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3098 #endif
3099   } else a->colmap = 0;
3100   if (oldmat->garray) {
3101     PetscInt len;
3102     len  = oldmat->B->cmap->n;
3103     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3104     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3105     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3106   } else a->garray = 0;
3107 
3108   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3109   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3110   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3111   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3112   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3113   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3114   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3115   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3116   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3117   *newmat = mat;
3118   PetscFunctionReturn(0);
3119 }
3120 
3121 /*
3122     Allows sending/receiving larger messages then 2 gigabytes in a single call
3123 */
3124 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm)
3125 {
3126   int             ierr;
3127   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
3128   PetscInt        i,numchunks;
3129   PetscMPIInt     icnt;
3130 
3131   numchunks = cnt/CHUNKSIZE + 1;
3132   for (i=0; i<numchunks; i++) {
3133     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
3134     ierr = MPI_Send(mess,icnt,type,to,tag,comm);
3135     if (type == MPIU_INT) {
3136       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
3137     } else if (type == MPIU_SCALAR) {
3138       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
3139     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
3140   }
3141   return 0;
3142 }
3143 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm)
3144 {
3145   int             ierr;
3146   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
3147   MPI_Status      status;
3148   PetscInt        i,numchunks;
3149   PetscMPIInt     icnt;
3150 
3151   numchunks = cnt/CHUNKSIZE + 1;
3152   for (i=0; i<numchunks; i++) {
3153     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
3154     ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status);
3155     if (type == MPIU_INT) {
3156       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
3157     } else if (type == MPIU_SCALAR) {
3158       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
3159     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
3160   }
3161   return 0;
3162 }
3163 
3164 #undef __FUNCT__
3165 #define __FUNCT__ "MatLoad_MPIAIJ"
3166 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3167 {
3168   PetscScalar    *vals,*svals;
3169   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3170   PetscErrorCode ierr;
3171   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3172   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3173   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3174   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3175   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3176   int            fd;
3177 
3178   PetscFunctionBegin;
3179   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3180   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3181   if (!rank) {
3182     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3183     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3184     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3185   }
3186 
3187   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3188 
3189   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3190   M = header[1]; N = header[2];
3191   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3192   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3193   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3194 
3195   /* If global sizes are set, check if they are consistent with that given in the file */
3196   if (sizesset) {
3197     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3198   }
3199   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3200   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3201 
3202   /* determine ownership of all rows */
3203   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3204   else m = newMat->rmap->n; /* Set by user */
3205 
3206   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3207   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3208 
3209   /* First process needs enough room for process with most rows */
3210   if (!rank) {
3211     mmax       = rowners[1];
3212     for (i=2; i<size; i++) {
3213       mmax = PetscMax(mmax,rowners[i]);
3214     }
3215   } else mmax = m;
3216 
3217   rowners[0] = 0;
3218   for (i=2; i<=size; i++) {
3219     rowners[i] += rowners[i-1];
3220   }
3221   rstart = rowners[rank];
3222   rend   = rowners[rank+1];
3223 
3224   /* distribute row lengths to all processors */
3225   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3226   if (!rank) {
3227     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3228     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3229     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3230     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3231     for (j=0; j<m; j++) {
3232       procsnz[0] += ourlens[j];
3233     }
3234     for (i=1; i<size; i++) {
3235       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3236       /* calculate the number of nonzeros on each processor */
3237       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3238         procsnz[i] += rowlengths[j];
3239       }
3240       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3241     }
3242     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3243   } else {
3244     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3245   }
3246 
3247   if (!rank) {
3248     /* determine max buffer needed and allocate it */
3249     maxnz = 0;
3250     for (i=0; i<size; i++) {
3251       maxnz = PetscMax(maxnz,procsnz[i]);
3252     }
3253     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3254 
3255     /* read in my part of the matrix column indices  */
3256     nz   = procsnz[0];
3257     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3258     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3259 
3260     /* read in every one elses and ship off */
3261     for (i=1; i<size; i++) {
3262       nz     = procsnz[i];
3263       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3264       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3265     }
3266     ierr = PetscFree(cols);CHKERRQ(ierr);
3267   } else {
3268     /* determine buffer space needed for message */
3269     nz = 0;
3270     for (i=0; i<m; i++) {
3271       nz += ourlens[i];
3272     }
3273     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3274 
3275     /* receive message of column indices*/
3276     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3277   }
3278 
3279   /* determine column ownership if matrix is not square */
3280   if (N != M) {
3281     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3282     else n = newMat->cmap->n;
3283     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3284     cstart = cend - n;
3285   } else {
3286     cstart = rstart;
3287     cend   = rend;
3288     n      = cend - cstart;
3289   }
3290 
3291   /* loop over local rows, determining number of off diagonal entries */
3292   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3293   jj = 0;
3294   for (i=0; i<m; i++) {
3295     for (j=0; j<ourlens[i]; j++) {
3296       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3297       jj++;
3298     }
3299   }
3300 
3301   for (i=0; i<m; i++) {
3302     ourlens[i] -= offlens[i];
3303   }
3304   if (!sizesset) {
3305     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3306   }
3307   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3308 
3309   for (i=0; i<m; i++) {
3310     ourlens[i] += offlens[i];
3311   }
3312 
3313   if (!rank) {
3314     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3315 
3316     /* read in my part of the matrix numerical values  */
3317     nz   = procsnz[0];
3318     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3319 
3320     /* insert into matrix */
3321     jj      = rstart;
3322     smycols = mycols;
3323     svals   = vals;
3324     for (i=0; i<m; i++) {
3325       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3326       smycols += ourlens[i];
3327       svals   += ourlens[i];
3328       jj++;
3329     }
3330 
3331     /* read in other processors and ship out */
3332     for (i=1; i<size; i++) {
3333       nz     = procsnz[i];
3334       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3335       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3336     }
3337     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3338   } else {
3339     /* receive numeric values */
3340     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3341 
3342     /* receive message of values*/
3343     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3344 
3345     /* insert into matrix */
3346     jj      = rstart;
3347     smycols = mycols;
3348     svals   = vals;
3349     for (i=0; i<m; i++) {
3350       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3351       smycols += ourlens[i];
3352       svals   += ourlens[i];
3353       jj++;
3354     }
3355   }
3356   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3357   ierr = PetscFree(vals);CHKERRQ(ierr);
3358   ierr = PetscFree(mycols);CHKERRQ(ierr);
3359   ierr = PetscFree(rowners);CHKERRQ(ierr);
3360 
3361   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3362   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3363   PetscFunctionReturn(0);
3364 }
3365 
3366 #undef __FUNCT__
3367 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3368 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3369 {
3370   PetscErrorCode ierr;
3371   IS             iscol_local;
3372   PetscInt       csize;
3373 
3374   PetscFunctionBegin;
3375   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3376   if (call == MAT_REUSE_MATRIX) {
3377     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3378     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3379   } else {
3380     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3381   }
3382   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3383   if (call == MAT_INITIAL_MATRIX) {
3384     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3385     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3386   }
3387   PetscFunctionReturn(0);
3388 }
3389 
3390 #undef __FUNCT__
3391 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3392 /*
3393     Not great since it makes two copies of the submatrix, first an SeqAIJ
3394   in local and then by concatenating the local matrices the end result.
3395   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3396 
3397   Note: This requires a sequential iscol with all indices.
3398 */
3399 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3400 {
3401   PetscErrorCode ierr;
3402   PetscMPIInt    rank,size;
3403   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3404   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3405   Mat            *local,M,Mreuse;
3406   MatScalar      *vwork,*aa;
3407   MPI_Comm       comm = ((PetscObject)mat)->comm;
3408   Mat_SeqAIJ     *aij;
3409 
3410 
3411   PetscFunctionBegin;
3412   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3413   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3414 
3415   if (call ==  MAT_REUSE_MATRIX) {
3416     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3417     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3418     local = &Mreuse;
3419     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3420   } else {
3421     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3422     Mreuse = *local;
3423     ierr   = PetscFree(local);CHKERRQ(ierr);
3424   }
3425 
3426   /*
3427       m - number of local rows
3428       n - number of columns (same on all processors)
3429       rstart - first row in new global matrix generated
3430   */
3431   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3432   if (call == MAT_INITIAL_MATRIX) {
3433     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3434     ii  = aij->i;
3435     jj  = aij->j;
3436 
3437     /*
3438         Determine the number of non-zeros in the diagonal and off-diagonal
3439         portions of the matrix in order to do correct preallocation
3440     */
3441 
3442     /* first get start and end of "diagonal" columns */
3443     if (csize == PETSC_DECIDE) {
3444       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3445       if (mglobal == n) { /* square matrix */
3446 	nlocal = m;
3447       } else {
3448         nlocal = n/size + ((n % size) > rank);
3449       }
3450     } else {
3451       nlocal = csize;
3452     }
3453     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3454     rstart = rend - nlocal;
3455     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3456 
3457     /* next, compute all the lengths */
3458     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3459     olens = dlens + m;
3460     for (i=0; i<m; i++) {
3461       jend = ii[i+1] - ii[i];
3462       olen = 0;
3463       dlen = 0;
3464       for (j=0; j<jend; j++) {
3465         if (*jj < rstart || *jj >= rend) olen++;
3466         else dlen++;
3467         jj++;
3468       }
3469       olens[i] = olen;
3470       dlens[i] = dlen;
3471     }
3472     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3473     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3474     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3475     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3476     ierr = PetscFree(dlens);CHKERRQ(ierr);
3477   } else {
3478     PetscInt ml,nl;
3479 
3480     M = *newmat;
3481     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3482     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3483     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3484     /*
3485          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3486        rather than the slower MatSetValues().
3487     */
3488     M->was_assembled = PETSC_TRUE;
3489     M->assembled     = PETSC_FALSE;
3490   }
3491   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3492   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3493   ii  = aij->i;
3494   jj  = aij->j;
3495   aa  = aij->a;
3496   for (i=0; i<m; i++) {
3497     row   = rstart + i;
3498     nz    = ii[i+1] - ii[i];
3499     cwork = jj;     jj += nz;
3500     vwork = aa;     aa += nz;
3501     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3502   }
3503 
3504   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3505   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3506   *newmat = M;
3507 
3508   /* save submatrix used in processor for next request */
3509   if (call ==  MAT_INITIAL_MATRIX) {
3510     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3511     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3512   }
3513 
3514   PetscFunctionReturn(0);
3515 }
3516 
3517 EXTERN_C_BEGIN
3518 #undef __FUNCT__
3519 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3520 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3521 {
3522   PetscInt       m,cstart, cend,j,nnz,i,d;
3523   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3524   const PetscInt *JJ;
3525   PetscScalar    *values;
3526   PetscErrorCode ierr;
3527 
3528   PetscFunctionBegin;
3529   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3530 
3531   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3532   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3533   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3534   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3535   m      = B->rmap->n;
3536   cstart = B->cmap->rstart;
3537   cend   = B->cmap->rend;
3538   rstart = B->rmap->rstart;
3539 
3540   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3541 
3542 #if defined(PETSC_USE_DEBUGGING)
3543   for (i=0; i<m; i++) {
3544     nnz     = Ii[i+1]- Ii[i];
3545     JJ      = J + Ii[i];
3546     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3547     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3548     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3549   }
3550 #endif
3551 
3552   for (i=0; i<m; i++) {
3553     nnz     = Ii[i+1]- Ii[i];
3554     JJ      = J + Ii[i];
3555     nnz_max = PetscMax(nnz_max,nnz);
3556     d       = 0;
3557     for (j=0; j<nnz; j++) {
3558       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3559     }
3560     d_nnz[i] = d;
3561     o_nnz[i] = nnz - d;
3562   }
3563   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3564   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3565 
3566   if (v) values = (PetscScalar*)v;
3567   else {
3568     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3569     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3570   }
3571 
3572   for (i=0; i<m; i++) {
3573     ii   = i + rstart;
3574     nnz  = Ii[i+1]- Ii[i];
3575     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3576   }
3577   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3578   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3579 
3580   if (!v) {
3581     ierr = PetscFree(values);CHKERRQ(ierr);
3582   }
3583   PetscFunctionReturn(0);
3584 }
3585 EXTERN_C_END
3586 
3587 #undef __FUNCT__
3588 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3589 /*@
3590    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3591    (the default parallel PETSc format).
3592 
3593    Collective on MPI_Comm
3594 
3595    Input Parameters:
3596 +  B - the matrix
3597 .  i - the indices into j for the start of each local row (starts with zero)
3598 .  j - the column indices for each local row (starts with zero)
3599 -  v - optional values in the matrix
3600 
3601    Level: developer
3602 
3603    Notes:
3604        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3605      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3606      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3607 
3608        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3609 
3610        The format which is used for the sparse matrix input, is equivalent to a
3611     row-major ordering.. i.e for the following matrix, the input data expected is
3612     as shown:
3613 
3614         1 0 0
3615         2 0 3     P0
3616        -------
3617         4 5 6     P1
3618 
3619      Process0 [P0]: rows_owned=[0,1]
3620         i =  {0,1,3}  [size = nrow+1  = 2+1]
3621         j =  {0,0,2}  [size = nz = 6]
3622         v =  {1,2,3}  [size = nz = 6]
3623 
3624      Process1 [P1]: rows_owned=[2]
3625         i =  {0,3}    [size = nrow+1  = 1+1]
3626         j =  {0,1,2}  [size = nz = 6]
3627         v =  {4,5,6}  [size = nz = 6]
3628 
3629 .keywords: matrix, aij, compressed row, sparse, parallel
3630 
3631 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3632           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3633 @*/
3634 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3635 {
3636   PetscErrorCode ierr;
3637 
3638   PetscFunctionBegin;
3639   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3640   PetscFunctionReturn(0);
3641 }
3642 
3643 #undef __FUNCT__
3644 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3645 /*@C
3646    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3647    (the default parallel PETSc format).  For good matrix assembly performance
3648    the user should preallocate the matrix storage by setting the parameters
3649    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3650    performance can be increased by more than a factor of 50.
3651 
3652    Collective on MPI_Comm
3653 
3654    Input Parameters:
3655 +  A - the matrix
3656 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3657            (same value is used for all local rows)
3658 .  d_nnz - array containing the number of nonzeros in the various rows of the
3659            DIAGONAL portion of the local submatrix (possibly different for each row)
3660            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3661            The size of this array is equal to the number of local rows, i.e 'm'.
3662            You must leave room for the diagonal entry even if it is zero.
3663 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3664            submatrix (same value is used for all local rows).
3665 -  o_nnz - array containing the number of nonzeros in the various rows of the
3666            OFF-DIAGONAL portion of the local submatrix (possibly different for
3667            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3668            structure. The size of this array is equal to the number
3669            of local rows, i.e 'm'.
3670 
3671    If the *_nnz parameter is given then the *_nz parameter is ignored
3672 
3673    The AIJ format (also called the Yale sparse matrix format or
3674    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3675    storage.  The stored row and column indices begin with zero.
3676    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3677 
3678    The parallel matrix is partitioned such that the first m0 rows belong to
3679    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3680    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3681 
3682    The DIAGONAL portion of the local submatrix of a processor can be defined
3683    as the submatrix which is obtained by extraction the part corresponding to
3684    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3685    first row that belongs to the processor, r2 is the last row belonging to
3686    the this processor, and c1-c2 is range of indices of the local part of a
3687    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3688    common case of a square matrix, the row and column ranges are the same and
3689    the DIAGONAL part is also square. The remaining portion of the local
3690    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3691 
3692    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3693 
3694    You can call MatGetInfo() to get information on how effective the preallocation was;
3695    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3696    You can also run with the option -info and look for messages with the string
3697    malloc in them to see if additional memory allocation was needed.
3698 
3699    Example usage:
3700 
3701    Consider the following 8x8 matrix with 34 non-zero values, that is
3702    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3703    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3704    as follows:
3705 
3706 .vb
3707             1  2  0  |  0  3  0  |  0  4
3708     Proc0   0  5  6  |  7  0  0  |  8  0
3709             9  0 10  | 11  0  0  | 12  0
3710     -------------------------------------
3711            13  0 14  | 15 16 17  |  0  0
3712     Proc1   0 18  0  | 19 20 21  |  0  0
3713             0  0  0  | 22 23  0  | 24  0
3714     -------------------------------------
3715     Proc2  25 26 27  |  0  0 28  | 29  0
3716            30  0  0  | 31 32 33  |  0 34
3717 .ve
3718 
3719    This can be represented as a collection of submatrices as:
3720 
3721 .vb
3722       A B C
3723       D E F
3724       G H I
3725 .ve
3726 
3727    Where the submatrices A,B,C are owned by proc0, D,E,F are
3728    owned by proc1, G,H,I are owned by proc2.
3729 
3730    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3731    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3732    The 'M','N' parameters are 8,8, and have the same values on all procs.
3733 
3734    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3735    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3736    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3737    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3738    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3739    matrix, ans [DF] as another SeqAIJ matrix.
3740 
3741    When d_nz, o_nz parameters are specified, d_nz storage elements are
3742    allocated for every row of the local diagonal submatrix, and o_nz
3743    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3744    One way to choose d_nz and o_nz is to use the max nonzerors per local
3745    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3746    In this case, the values of d_nz,o_nz are:
3747 .vb
3748      proc0 : dnz = 2, o_nz = 2
3749      proc1 : dnz = 3, o_nz = 2
3750      proc2 : dnz = 1, o_nz = 4
3751 .ve
3752    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3753    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3754    for proc3. i.e we are using 12+15+10=37 storage locations to store
3755    34 values.
3756 
3757    When d_nnz, o_nnz parameters are specified, the storage is specified
3758    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3759    In the above case the values for d_nnz,o_nnz are:
3760 .vb
3761      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3762      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3763      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3764 .ve
3765    Here the space allocated is sum of all the above values i.e 34, and
3766    hence pre-allocation is perfect.
3767 
3768    Level: intermediate
3769 
3770 .keywords: matrix, aij, compressed row, sparse, parallel
3771 
3772 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3773           MPIAIJ, MatGetInfo()
3774 @*/
3775 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3776 {
3777   PetscErrorCode ierr;
3778 
3779   PetscFunctionBegin;
3780   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3781   PetscFunctionReturn(0);
3782 }
3783 
3784 #undef __FUNCT__
3785 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3786 /*@
3787      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3788          CSR format the local rows.
3789 
3790    Collective on MPI_Comm
3791 
3792    Input Parameters:
3793 +  comm - MPI communicator
3794 .  m - number of local rows (Cannot be PETSC_DECIDE)
3795 .  n - This value should be the same as the local size used in creating the
3796        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3797        calculated if N is given) For square matrices n is almost always m.
3798 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3799 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3800 .   i - row indices
3801 .   j - column indices
3802 -   a - matrix values
3803 
3804    Output Parameter:
3805 .   mat - the matrix
3806 
3807    Level: intermediate
3808 
3809    Notes:
3810        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3811      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3812      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3813 
3814        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3815 
3816        The format which is used for the sparse matrix input, is equivalent to a
3817     row-major ordering.. i.e for the following matrix, the input data expected is
3818     as shown:
3819 
3820         1 0 0
3821         2 0 3     P0
3822        -------
3823         4 5 6     P1
3824 
3825      Process0 [P0]: rows_owned=[0,1]
3826         i =  {0,1,3}  [size = nrow+1  = 2+1]
3827         j =  {0,0,2}  [size = nz = 6]
3828         v =  {1,2,3}  [size = nz = 6]
3829 
3830      Process1 [P1]: rows_owned=[2]
3831         i =  {0,3}    [size = nrow+1  = 1+1]
3832         j =  {0,1,2}  [size = nz = 6]
3833         v =  {4,5,6}  [size = nz = 6]
3834 
3835 .keywords: matrix, aij, compressed row, sparse, parallel
3836 
3837 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3838           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3839 @*/
3840 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3841 {
3842   PetscErrorCode ierr;
3843 
3844  PetscFunctionBegin;
3845   if (i[0]) {
3846     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3847   }
3848   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3849   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3850   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3851   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3852   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3853   PetscFunctionReturn(0);
3854 }
3855 
3856 #undef __FUNCT__
3857 #define __FUNCT__ "MatCreateMPIAIJ"
3858 /*@C
3859    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3860    (the default parallel PETSc format).  For good matrix assembly performance
3861    the user should preallocate the matrix storage by setting the parameters
3862    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3863    performance can be increased by more than a factor of 50.
3864 
3865    Collective on MPI_Comm
3866 
3867    Input Parameters:
3868 +  comm - MPI communicator
3869 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3870            This value should be the same as the local size used in creating the
3871            y vector for the matrix-vector product y = Ax.
3872 .  n - This value should be the same as the local size used in creating the
3873        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3874        calculated if N is given) For square matrices n is almost always m.
3875 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3876 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3877 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3878            (same value is used for all local rows)
3879 .  d_nnz - array containing the number of nonzeros in the various rows of the
3880            DIAGONAL portion of the local submatrix (possibly different for each row)
3881            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3882            The size of this array is equal to the number of local rows, i.e 'm'.
3883            You must leave room for the diagonal entry even if it is zero.
3884 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3885            submatrix (same value is used for all local rows).
3886 -  o_nnz - array containing the number of nonzeros in the various rows of the
3887            OFF-DIAGONAL portion of the local submatrix (possibly different for
3888            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3889            structure. The size of this array is equal to the number
3890            of local rows, i.e 'm'.
3891 
3892    Output Parameter:
3893 .  A - the matrix
3894 
3895    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3896    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3897    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3898 
3899    Notes:
3900    If the *_nnz parameter is given then the *_nz parameter is ignored
3901 
3902    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3903    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3904    storage requirements for this matrix.
3905 
3906    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3907    processor than it must be used on all processors that share the object for
3908    that argument.
3909 
3910    The user MUST specify either the local or global matrix dimensions
3911    (possibly both).
3912 
3913    The parallel matrix is partitioned across processors such that the
3914    first m0 rows belong to process 0, the next m1 rows belong to
3915    process 1, the next m2 rows belong to process 2 etc.. where
3916    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3917    values corresponding to [m x N] submatrix.
3918 
3919    The columns are logically partitioned with the n0 columns belonging
3920    to 0th partition, the next n1 columns belonging to the next
3921    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3922 
3923    The DIAGONAL portion of the local submatrix on any given processor
3924    is the submatrix corresponding to the rows and columns m,n
3925    corresponding to the given processor. i.e diagonal matrix on
3926    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3927    etc. The remaining portion of the local submatrix [m x (N-n)]
3928    constitute the OFF-DIAGONAL portion. The example below better
3929    illustrates this concept.
3930 
3931    For a square global matrix we define each processor's diagonal portion
3932    to be its local rows and the corresponding columns (a square submatrix);
3933    each processor's off-diagonal portion encompasses the remainder of the
3934    local matrix (a rectangular submatrix).
3935 
3936    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3937 
3938    When calling this routine with a single process communicator, a matrix of
3939    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3940    type of communicator, use the construction mechanism:
3941      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3942 
3943    By default, this format uses inodes (identical nodes) when possible.
3944    We search for consecutive rows with the same nonzero structure, thereby
3945    reusing matrix information to achieve increased efficiency.
3946 
3947    Options Database Keys:
3948 +  -mat_no_inode  - Do not use inodes
3949 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3950 -  -mat_aij_oneindex - Internally use indexing starting at 1
3951         rather than 0.  Note that when calling MatSetValues(),
3952         the user still MUST index entries starting at 0!
3953 
3954 
3955    Example usage:
3956 
3957    Consider the following 8x8 matrix with 34 non-zero values, that is
3958    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3959    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3960    as follows:
3961 
3962 .vb
3963             1  2  0  |  0  3  0  |  0  4
3964     Proc0   0  5  6  |  7  0  0  |  8  0
3965             9  0 10  | 11  0  0  | 12  0
3966     -------------------------------------
3967            13  0 14  | 15 16 17  |  0  0
3968     Proc1   0 18  0  | 19 20 21  |  0  0
3969             0  0  0  | 22 23  0  | 24  0
3970     -------------------------------------
3971     Proc2  25 26 27  |  0  0 28  | 29  0
3972            30  0  0  | 31 32 33  |  0 34
3973 .ve
3974 
3975    This can be represented as a collection of submatrices as:
3976 
3977 .vb
3978       A B C
3979       D E F
3980       G H I
3981 .ve
3982 
3983    Where the submatrices A,B,C are owned by proc0, D,E,F are
3984    owned by proc1, G,H,I are owned by proc2.
3985 
3986    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3987    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3988    The 'M','N' parameters are 8,8, and have the same values on all procs.
3989 
3990    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3991    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3992    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3993    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3994    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3995    matrix, ans [DF] as another SeqAIJ matrix.
3996 
3997    When d_nz, o_nz parameters are specified, d_nz storage elements are
3998    allocated for every row of the local diagonal submatrix, and o_nz
3999    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4000    One way to choose d_nz and o_nz is to use the max nonzerors per local
4001    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4002    In this case, the values of d_nz,o_nz are:
4003 .vb
4004      proc0 : dnz = 2, o_nz = 2
4005      proc1 : dnz = 3, o_nz = 2
4006      proc2 : dnz = 1, o_nz = 4
4007 .ve
4008    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4009    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4010    for proc3. i.e we are using 12+15+10=37 storage locations to store
4011    34 values.
4012 
4013    When d_nnz, o_nnz parameters are specified, the storage is specified
4014    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4015    In the above case the values for d_nnz,o_nnz are:
4016 .vb
4017      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4018      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4019      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4020 .ve
4021    Here the space allocated is sum of all the above values i.e 34, and
4022    hence pre-allocation is perfect.
4023 
4024    Level: intermediate
4025 
4026 .keywords: matrix, aij, compressed row, sparse, parallel
4027 
4028 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4029           MPIAIJ, MatCreateMPIAIJWithArrays()
4030 @*/
4031 PetscErrorCode  MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4032 {
4033   PetscErrorCode ierr;
4034   PetscMPIInt    size;
4035 
4036   PetscFunctionBegin;
4037   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4038   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4039   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4040   if (size > 1) {
4041     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4042     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4043   } else {
4044     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4045     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4046   }
4047   PetscFunctionReturn(0);
4048 }
4049 
4050 #undef __FUNCT__
4051 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4052 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4053 {
4054   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4055 
4056   PetscFunctionBegin;
4057   *Ad     = a->A;
4058   *Ao     = a->B;
4059   *colmap = a->garray;
4060   PetscFunctionReturn(0);
4061 }
4062 
4063 #undef __FUNCT__
4064 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4065 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4066 {
4067   PetscErrorCode ierr;
4068   PetscInt       i;
4069   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4070 
4071   PetscFunctionBegin;
4072   if (coloring->ctype == IS_COLORING_GLOBAL) {
4073     ISColoringValue *allcolors,*colors;
4074     ISColoring      ocoloring;
4075 
4076     /* set coloring for diagonal portion */
4077     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4078 
4079     /* set coloring for off-diagonal portion */
4080     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4081     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4082     for (i=0; i<a->B->cmap->n; i++) {
4083       colors[i] = allcolors[a->garray[i]];
4084     }
4085     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4086     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4087     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4088     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
4089   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4090     ISColoringValue *colors;
4091     PetscInt        *larray;
4092     ISColoring      ocoloring;
4093 
4094     /* set coloring for diagonal portion */
4095     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4096     for (i=0; i<a->A->cmap->n; i++) {
4097       larray[i] = i + A->cmap->rstart;
4098     }
4099     ierr = ISGlobalToLocalMappingApply(A->cmapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4100     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4101     for (i=0; i<a->A->cmap->n; i++) {
4102       colors[i] = coloring->colors[larray[i]];
4103     }
4104     ierr = PetscFree(larray);CHKERRQ(ierr);
4105     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4106     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4107     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
4108 
4109     /* set coloring for off-diagonal portion */
4110     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4111     ierr = ISGlobalToLocalMappingApply(A->cmapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4112     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4113     for (i=0; i<a->B->cmap->n; i++) {
4114       colors[i] = coloring->colors[larray[i]];
4115     }
4116     ierr = PetscFree(larray);CHKERRQ(ierr);
4117     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4118     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4119     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
4120   } else {
4121     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4122   }
4123 
4124   PetscFunctionReturn(0);
4125 }
4126 
4127 #if defined(PETSC_HAVE_ADIC)
4128 #undef __FUNCT__
4129 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4130 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4131 {
4132   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4133   PetscErrorCode ierr;
4134 
4135   PetscFunctionBegin;
4136   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4137   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4138   PetscFunctionReturn(0);
4139 }
4140 #endif
4141 
4142 #undef __FUNCT__
4143 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4144 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4145 {
4146   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4147   PetscErrorCode ierr;
4148 
4149   PetscFunctionBegin;
4150   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4151   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4152   PetscFunctionReturn(0);
4153 }
4154 
4155 #undef __FUNCT__
4156 #define __FUNCT__ "MatMerge"
4157 /*@
4158       MatMerge - Creates a single large PETSc matrix by concatinating sequential
4159                  matrices from each processor
4160 
4161     Collective on MPI_Comm
4162 
4163    Input Parameters:
4164 +    comm - the communicators the parallel matrix will live on
4165 .    inmat - the input sequential matrices
4166 .    n - number of local columns (or PETSC_DECIDE)
4167 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4168 
4169    Output Parameter:
4170 .    outmat - the parallel matrix generated
4171 
4172     Level: advanced
4173 
4174    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4175 
4176 @*/
4177 PetscErrorCode  MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4178 {
4179   PetscErrorCode ierr;
4180   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
4181   PetscInt       *indx;
4182   PetscScalar    *values;
4183 
4184   PetscFunctionBegin;
4185   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4186   if (scall == MAT_INITIAL_MATRIX){
4187     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
4188     if (n == PETSC_DECIDE){
4189       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4190     }
4191     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4192     rstart -= m;
4193 
4194     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4195     for (i=0;i<m;i++) {
4196       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4197       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4198       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4199     }
4200     /* This routine will ONLY return MPIAIJ type matrix */
4201     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4202     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4203     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4204     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4205     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4206 
4207   } else if (scall == MAT_REUSE_MATRIX){
4208     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4209   } else {
4210     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4211   }
4212 
4213   for (i=0;i<m;i++) {
4214     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4215     Ii    = i + rstart;
4216     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4217     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4218   }
4219   ierr = MatDestroy(inmat);CHKERRQ(ierr);
4220   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4221   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4222 
4223   PetscFunctionReturn(0);
4224 }
4225 
4226 #undef __FUNCT__
4227 #define __FUNCT__ "MatFileSplit"
4228 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4229 {
4230   PetscErrorCode    ierr;
4231   PetscMPIInt       rank;
4232   PetscInt          m,N,i,rstart,nnz;
4233   size_t            len;
4234   const PetscInt    *indx;
4235   PetscViewer       out;
4236   char              *name;
4237   Mat               B;
4238   const PetscScalar *values;
4239 
4240   PetscFunctionBegin;
4241   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4242   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4243   /* Should this be the type of the diagonal block of A? */
4244   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4245   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4246   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4247   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4248   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4249   for (i=0;i<m;i++) {
4250     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4251     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4252     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4253   }
4254   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4255   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4256 
4257   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4258   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4259   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4260   sprintf(name,"%s.%d",outfile,rank);
4261   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4262   ierr = PetscFree(name);
4263   ierr = MatView(B,out);CHKERRQ(ierr);
4264   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
4265   ierr = MatDestroy(B);CHKERRQ(ierr);
4266   PetscFunctionReturn(0);
4267 }
4268 
4269 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4270 #undef __FUNCT__
4271 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4272 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4273 {
4274   PetscErrorCode       ierr;
4275   Mat_Merge_SeqsToMPI  *merge;
4276   PetscContainer       container;
4277 
4278   PetscFunctionBegin;
4279   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4280   if (container) {
4281     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4282     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4283     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4284     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4285     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4286     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4287     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4288     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4289     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4290     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4291     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4292     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4293     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4294     ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr);
4295 
4296     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4297     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4298   }
4299   ierr = PetscFree(merge);CHKERRQ(ierr);
4300 
4301   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4302   PetscFunctionReturn(0);
4303 }
4304 
4305 #include "../src/mat/utils/freespace.h"
4306 #include "petscbt.h"
4307 
4308 #undef __FUNCT__
4309 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4310 /*@C
4311       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4312                  matrices from each processor
4313 
4314     Collective on MPI_Comm
4315 
4316    Input Parameters:
4317 +    comm - the communicators the parallel matrix will live on
4318 .    seqmat - the input sequential matrices
4319 .    m - number of local rows (or PETSC_DECIDE)
4320 .    n - number of local columns (or PETSC_DECIDE)
4321 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4322 
4323    Output Parameter:
4324 .    mpimat - the parallel matrix generated
4325 
4326     Level: advanced
4327 
4328    Notes:
4329      The dimensions of the sequential matrix in each processor MUST be the same.
4330      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4331      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4332 @*/
4333 PetscErrorCode  MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4334 {
4335   PetscErrorCode       ierr;
4336   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4337   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4338   PetscMPIInt          size,rank,taga,*len_s;
4339   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4340   PetscInt             proc,m;
4341   PetscInt             **buf_ri,**buf_rj;
4342   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4343   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4344   MPI_Request          *s_waits,*r_waits;
4345   MPI_Status           *status;
4346   MatScalar            *aa=a->a;
4347   MatScalar            **abuf_r,*ba_i;
4348   Mat_Merge_SeqsToMPI  *merge;
4349   PetscContainer       container;
4350 
4351   PetscFunctionBegin;
4352   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4353 
4354   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4355   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4356 
4357   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4358   if (container) {
4359     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4360   }
4361   bi     = merge->bi;
4362   bj     = merge->bj;
4363   buf_ri = merge->buf_ri;
4364   buf_rj = merge->buf_rj;
4365 
4366   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4367   owners = merge->rowmap->range;
4368   len_s  = merge->len_s;
4369 
4370   /* send and recv matrix values */
4371   /*-----------------------------*/
4372   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4373   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4374 
4375   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4376   for (proc=0,k=0; proc<size; proc++){
4377     if (!len_s[proc]) continue;
4378     i = owners[proc];
4379     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4380     k++;
4381   }
4382 
4383   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4384   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4385   ierr = PetscFree(status);CHKERRQ(ierr);
4386 
4387   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4388   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4389 
4390   /* insert mat values of mpimat */
4391   /*----------------------------*/
4392   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4393   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4394 
4395   for (k=0; k<merge->nrecv; k++){
4396     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4397     nrows = *(buf_ri_k[k]);
4398     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4399     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4400   }
4401 
4402   /* set values of ba */
4403   m = merge->rowmap->n;
4404   for (i=0; i<m; i++) {
4405     arow = owners[rank] + i;
4406     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4407     bnzi = bi[i+1] - bi[i];
4408     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4409 
4410     /* add local non-zero vals of this proc's seqmat into ba */
4411     anzi = ai[arow+1] - ai[arow];
4412     aj   = a->j + ai[arow];
4413     aa   = a->a + ai[arow];
4414     nextaj = 0;
4415     for (j=0; nextaj<anzi; j++){
4416       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4417         ba_i[j] += aa[nextaj++];
4418       }
4419     }
4420 
4421     /* add received vals into ba */
4422     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4423       /* i-th row */
4424       if (i == *nextrow[k]) {
4425         anzi = *(nextai[k]+1) - *nextai[k];
4426         aj   = buf_rj[k] + *(nextai[k]);
4427         aa   = abuf_r[k] + *(nextai[k]);
4428         nextaj = 0;
4429         for (j=0; nextaj<anzi; j++){
4430           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4431             ba_i[j] += aa[nextaj++];
4432           }
4433         }
4434         nextrow[k]++; nextai[k]++;
4435       }
4436     }
4437     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4438   }
4439   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4440   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4441 
4442   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4443   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4444   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4445   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4446   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4447   PetscFunctionReturn(0);
4448 }
4449 
4450 #undef __FUNCT__
4451 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4452 PetscErrorCode  MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4453 {
4454   PetscErrorCode       ierr;
4455   Mat                  B_mpi;
4456   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4457   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4458   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4459   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4460   PetscInt             len,proc,*dnz,*onz;
4461   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4462   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4463   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4464   MPI_Status           *status;
4465   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4466   PetscBT              lnkbt;
4467   Mat_Merge_SeqsToMPI  *merge;
4468   PetscContainer       container;
4469 
4470   PetscFunctionBegin;
4471   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4472 
4473   /* make sure it is a PETSc comm */
4474   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4475   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4476   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4477 
4478   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4479   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4480 
4481   /* determine row ownership */
4482   /*---------------------------------------------------------*/
4483   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4484   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4485   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4486   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4487   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4488   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4489   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4490 
4491   m      = merge->rowmap->n;
4492   M      = merge->rowmap->N;
4493   owners = merge->rowmap->range;
4494 
4495   /* determine the number of messages to send, their lengths */
4496   /*---------------------------------------------------------*/
4497   len_s  = merge->len_s;
4498 
4499   len = 0;  /* length of buf_si[] */
4500   merge->nsend = 0;
4501   for (proc=0; proc<size; proc++){
4502     len_si[proc] = 0;
4503     if (proc == rank){
4504       len_s[proc] = 0;
4505     } else {
4506       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4507       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4508     }
4509     if (len_s[proc]) {
4510       merge->nsend++;
4511       nrows = 0;
4512       for (i=owners[proc]; i<owners[proc+1]; i++){
4513         if (ai[i+1] > ai[i]) nrows++;
4514       }
4515       len_si[proc] = 2*(nrows+1);
4516       len += len_si[proc];
4517     }
4518   }
4519 
4520   /* determine the number and length of messages to receive for ij-structure */
4521   /*-------------------------------------------------------------------------*/
4522   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4523   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4524 
4525   /* post the Irecv of j-structure */
4526   /*-------------------------------*/
4527   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4528   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4529 
4530   /* post the Isend of j-structure */
4531   /*--------------------------------*/
4532   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4533 
4534   for (proc=0, k=0; proc<size; proc++){
4535     if (!len_s[proc]) continue;
4536     i = owners[proc];
4537     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4538     k++;
4539   }
4540 
4541   /* receives and sends of j-structure are complete */
4542   /*------------------------------------------------*/
4543   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4544   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4545 
4546   /* send and recv i-structure */
4547   /*---------------------------*/
4548   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4549   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4550 
4551   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4552   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4553   for (proc=0,k=0; proc<size; proc++){
4554     if (!len_s[proc]) continue;
4555     /* form outgoing message for i-structure:
4556          buf_si[0]:                 nrows to be sent
4557                [1:nrows]:           row index (global)
4558                [nrows+1:2*nrows+1]: i-structure index
4559     */
4560     /*-------------------------------------------*/
4561     nrows = len_si[proc]/2 - 1;
4562     buf_si_i    = buf_si + nrows+1;
4563     buf_si[0]   = nrows;
4564     buf_si_i[0] = 0;
4565     nrows = 0;
4566     for (i=owners[proc]; i<owners[proc+1]; i++){
4567       anzi = ai[i+1] - ai[i];
4568       if (anzi) {
4569         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4570         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4571         nrows++;
4572       }
4573     }
4574     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4575     k++;
4576     buf_si += len_si[proc];
4577   }
4578 
4579   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4580   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4581 
4582   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4583   for (i=0; i<merge->nrecv; i++){
4584     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4585   }
4586 
4587   ierr = PetscFree(len_si);CHKERRQ(ierr);
4588   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4589   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4590   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4591   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4592   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4593   ierr = PetscFree(status);CHKERRQ(ierr);
4594 
4595   /* compute a local seq matrix in each processor */
4596   /*----------------------------------------------*/
4597   /* allocate bi array and free space for accumulating nonzero column info */
4598   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4599   bi[0] = 0;
4600 
4601   /* create and initialize a linked list */
4602   nlnk = N+1;
4603   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4604 
4605   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4606   len = 0;
4607   len  = ai[owners[rank+1]] - ai[owners[rank]];
4608   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4609   current_space = free_space;
4610 
4611   /* determine symbolic info for each local row */
4612   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4613 
4614   for (k=0; k<merge->nrecv; k++){
4615     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4616     nrows = *buf_ri_k[k];
4617     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4618     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4619   }
4620 
4621   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4622   len = 0;
4623   for (i=0;i<m;i++) {
4624     bnzi   = 0;
4625     /* add local non-zero cols of this proc's seqmat into lnk */
4626     arow   = owners[rank] + i;
4627     anzi   = ai[arow+1] - ai[arow];
4628     aj     = a->j + ai[arow];
4629     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4630     bnzi += nlnk;
4631     /* add received col data into lnk */
4632     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4633       if (i == *nextrow[k]) { /* i-th row */
4634         anzi = *(nextai[k]+1) - *nextai[k];
4635         aj   = buf_rj[k] + *nextai[k];
4636         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4637         bnzi += nlnk;
4638         nextrow[k]++; nextai[k]++;
4639       }
4640     }
4641     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4642 
4643     /* if free space is not available, make more free space */
4644     if (current_space->local_remaining<bnzi) {
4645       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4646       nspacedouble++;
4647     }
4648     /* copy data into free space, then initialize lnk */
4649     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4650     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4651 
4652     current_space->array           += bnzi;
4653     current_space->local_used      += bnzi;
4654     current_space->local_remaining -= bnzi;
4655 
4656     bi[i+1] = bi[i] + bnzi;
4657   }
4658 
4659   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4660 
4661   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4662   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4663   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4664 
4665   /* create symbolic parallel matrix B_mpi */
4666   /*---------------------------------------*/
4667   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4668   if (n==PETSC_DECIDE) {
4669     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4670   } else {
4671     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4672   }
4673   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4674   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4675   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4676 
4677   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4678   B_mpi->assembled     = PETSC_FALSE;
4679   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4680   merge->bi            = bi;
4681   merge->bj            = bj;
4682   merge->buf_ri        = buf_ri;
4683   merge->buf_rj        = buf_rj;
4684   merge->coi           = PETSC_NULL;
4685   merge->coj           = PETSC_NULL;
4686   merge->owners_co     = PETSC_NULL;
4687 
4688   /* attach the supporting struct to B_mpi for reuse */
4689   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4690   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4691   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4692   *mpimat = B_mpi;
4693 
4694   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4695   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4696   PetscFunctionReturn(0);
4697 }
4698 
4699 #undef __FUNCT__
4700 #define __FUNCT__ "MatMerge_SeqsToMPI"
4701 PetscErrorCode  MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4702 {
4703   PetscErrorCode   ierr;
4704 
4705   PetscFunctionBegin;
4706   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4707   if (scall == MAT_INITIAL_MATRIX){
4708     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4709   }
4710   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4711   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4712   PetscFunctionReturn(0);
4713 }
4714 
4715 #undef __FUNCT__
4716 #define __FUNCT__ "MatGetLocalMat"
4717 /*@
4718      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4719 
4720     Not Collective
4721 
4722    Input Parameters:
4723 +    A - the matrix
4724 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4725 
4726    Output Parameter:
4727 .    A_loc - the local sequential matrix generated
4728 
4729     Level: developer
4730 
4731 @*/
4732 PetscErrorCode  MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4733 {
4734   PetscErrorCode  ierr;
4735   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4736   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4737   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4738   MatScalar       *aa=a->a,*ba=b->a,*cam;
4739   PetscScalar     *ca;
4740   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4741   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4742 
4743   PetscFunctionBegin;
4744   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4745   if (scall == MAT_INITIAL_MATRIX){
4746     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4747     ci[0] = 0;
4748     for (i=0; i<am; i++){
4749       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4750     }
4751     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4752     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4753     k = 0;
4754     for (i=0; i<am; i++) {
4755       ncols_o = bi[i+1] - bi[i];
4756       ncols_d = ai[i+1] - ai[i];
4757       /* off-diagonal portion of A */
4758       for (jo=0; jo<ncols_o; jo++) {
4759         col = cmap[*bj];
4760         if (col >= cstart) break;
4761         cj[k]   = col; bj++;
4762         ca[k++] = *ba++;
4763       }
4764       /* diagonal portion of A */
4765       for (j=0; j<ncols_d; j++) {
4766         cj[k]   = cstart + *aj++;
4767         ca[k++] = *aa++;
4768       }
4769       /* off-diagonal portion of A */
4770       for (j=jo; j<ncols_o; j++) {
4771         cj[k]   = cmap[*bj++];
4772         ca[k++] = *ba++;
4773       }
4774     }
4775     /* put together the new matrix */
4776     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4777     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4778     /* Since these are PETSc arrays, change flags to free them as necessary. */
4779     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4780     mat->free_a  = PETSC_TRUE;
4781     mat->free_ij = PETSC_TRUE;
4782     mat->nonew   = 0;
4783   } else if (scall == MAT_REUSE_MATRIX){
4784     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4785     ci = mat->i; cj = mat->j; cam = mat->a;
4786     for (i=0; i<am; i++) {
4787       /* off-diagonal portion of A */
4788       ncols_o = bi[i+1] - bi[i];
4789       for (jo=0; jo<ncols_o; jo++) {
4790         col = cmap[*bj];
4791         if (col >= cstart) break;
4792         *cam++ = *ba++; bj++;
4793       }
4794       /* diagonal portion of A */
4795       ncols_d = ai[i+1] - ai[i];
4796       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4797       /* off-diagonal portion of A */
4798       for (j=jo; j<ncols_o; j++) {
4799         *cam++ = *ba++; bj++;
4800       }
4801     }
4802   } else {
4803     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4804   }
4805 
4806   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4807   PetscFunctionReturn(0);
4808 }
4809 
4810 #undef __FUNCT__
4811 #define __FUNCT__ "MatGetLocalMatCondensed"
4812 /*@C
4813      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4814 
4815     Not Collective
4816 
4817    Input Parameters:
4818 +    A - the matrix
4819 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4820 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4821 
4822    Output Parameter:
4823 .    A_loc - the local sequential matrix generated
4824 
4825     Level: developer
4826 
4827 @*/
4828 PetscErrorCode  MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4829 {
4830   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4831   PetscErrorCode    ierr;
4832   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4833   IS                isrowa,iscola;
4834   Mat               *aloc;
4835 
4836   PetscFunctionBegin;
4837   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4838   if (!row){
4839     start = A->rmap->rstart; end = A->rmap->rend;
4840     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4841   } else {
4842     isrowa = *row;
4843   }
4844   if (!col){
4845     start = A->cmap->rstart;
4846     cmap  = a->garray;
4847     nzA   = a->A->cmap->n;
4848     nzB   = a->B->cmap->n;
4849     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4850     ncols = 0;
4851     for (i=0; i<nzB; i++) {
4852       if (cmap[i] < start) idx[ncols++] = cmap[i];
4853       else break;
4854     }
4855     imark = i;
4856     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4857     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4858     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4859   } else {
4860     iscola = *col;
4861   }
4862   if (scall != MAT_INITIAL_MATRIX){
4863     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4864     aloc[0] = *A_loc;
4865   }
4866   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4867   *A_loc = aloc[0];
4868   ierr = PetscFree(aloc);CHKERRQ(ierr);
4869   if (!row){
4870     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4871   }
4872   if (!col){
4873     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4874   }
4875   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4876   PetscFunctionReturn(0);
4877 }
4878 
4879 #undef __FUNCT__
4880 #define __FUNCT__ "MatGetBrowsOfAcols"
4881 /*@C
4882     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4883 
4884     Collective on Mat
4885 
4886    Input Parameters:
4887 +    A,B - the matrices in mpiaij format
4888 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4889 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4890 
4891    Output Parameter:
4892 +    rowb, colb - index sets of rows and columns of B to extract
4893 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4894 -    B_seq - the sequential matrix generated
4895 
4896     Level: developer
4897 
4898 @*/
4899 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4900 {
4901   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4902   PetscErrorCode    ierr;
4903   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4904   IS                isrowb,iscolb;
4905   Mat               *bseq;
4906 
4907   PetscFunctionBegin;
4908   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4909     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4910   }
4911   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4912 
4913   if (scall == MAT_INITIAL_MATRIX){
4914     start = A->cmap->rstart;
4915     cmap  = a->garray;
4916     nzA   = a->A->cmap->n;
4917     nzB   = a->B->cmap->n;
4918     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4919     ncols = 0;
4920     for (i=0; i<nzB; i++) {  /* row < local row index */
4921       if (cmap[i] < start) idx[ncols++] = cmap[i];
4922       else break;
4923     }
4924     imark = i;
4925     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4926     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4927     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4928     *brstart = imark;
4929     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4930   } else {
4931     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4932     isrowb = *rowb; iscolb = *colb;
4933     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4934     bseq[0] = *B_seq;
4935   }
4936   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4937   *B_seq = bseq[0];
4938   ierr = PetscFree(bseq);CHKERRQ(ierr);
4939   if (!rowb){
4940     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4941   } else {
4942     *rowb = isrowb;
4943   }
4944   if (!colb){
4945     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4946   } else {
4947     *colb = iscolb;
4948   }
4949   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4950   PetscFunctionReturn(0);
4951 }
4952 
4953 #undef __FUNCT__
4954 #define __FUNCT__ "MatGetBrowsOfAoCols"
4955 /*@C
4956     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4957     of the OFF-DIAGONAL portion of local A
4958 
4959     Collective on Mat
4960 
4961    Input Parameters:
4962 +    A,B - the matrices in mpiaij format
4963 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4964 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4965 .    startsj_r - similar to startsj for receives
4966 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4967 
4968    Output Parameter:
4969 +    B_oth - the sequential matrix generated
4970 
4971     Level: developer
4972 
4973 @*/
4974 PetscErrorCode  MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4975 {
4976   VecScatter_MPI_General *gen_to,*gen_from;
4977   PetscErrorCode         ierr;
4978   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4979   Mat_SeqAIJ             *b_oth;
4980   VecScatter             ctx=a->Mvctx;
4981   MPI_Comm               comm=((PetscObject)ctx)->comm;
4982   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4983   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4984   PetscScalar            *rvalues,*svalues;
4985   MatScalar              *b_otha,*bufa,*bufA;
4986   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4987   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4988   MPI_Status             *sstatus,rstatus;
4989   PetscMPIInt            jj;
4990   PetscInt               *cols,sbs,rbs;
4991   PetscScalar            *vals;
4992 
4993   PetscFunctionBegin;
4994   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4995     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4996   }
4997   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4998   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4999 
5000   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5001   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5002   rvalues  = gen_from->values; /* holds the length of receiving row */
5003   svalues  = gen_to->values;   /* holds the length of sending row */
5004   nrecvs   = gen_from->n;
5005   nsends   = gen_to->n;
5006 
5007   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5008   srow     = gen_to->indices;   /* local row index to be sent */
5009   sstarts  = gen_to->starts;
5010   sprocs   = gen_to->procs;
5011   sstatus  = gen_to->sstatus;
5012   sbs      = gen_to->bs;
5013   rstarts  = gen_from->starts;
5014   rprocs   = gen_from->procs;
5015   rbs      = gen_from->bs;
5016 
5017   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5018   if (scall == MAT_INITIAL_MATRIX){
5019     /* i-array */
5020     /*---------*/
5021     /*  post receives */
5022     for (i=0; i<nrecvs; i++){
5023       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5024       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5025       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5026     }
5027 
5028     /* pack the outgoing message */
5029     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5030     sstartsj[0] = 0;  rstartsj[0] = 0;
5031     len = 0; /* total length of j or a array to be sent */
5032     k = 0;
5033     for (i=0; i<nsends; i++){
5034       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5035       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5036       for (j=0; j<nrows; j++) {
5037         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5038         for (l=0; l<sbs; l++){
5039           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5040           rowlen[j*sbs+l] = ncols;
5041           len += ncols;
5042           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5043         }
5044         k++;
5045       }
5046       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5047       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5048     }
5049     /* recvs and sends of i-array are completed */
5050     i = nrecvs;
5051     while (i--) {
5052       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5053     }
5054     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5055 
5056     /* allocate buffers for sending j and a arrays */
5057     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5058     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5059 
5060     /* create i-array of B_oth */
5061     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5062     b_othi[0] = 0;
5063     len = 0; /* total length of j or a array to be received */
5064     k = 0;
5065     for (i=0; i<nrecvs; i++){
5066       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5067       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5068       for (j=0; j<nrows; j++) {
5069         b_othi[k+1] = b_othi[k] + rowlen[j];
5070         len += rowlen[j]; k++;
5071       }
5072       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5073     }
5074 
5075     /* allocate space for j and a arrrays of B_oth */
5076     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5077     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5078 
5079     /* j-array */
5080     /*---------*/
5081     /*  post receives of j-array */
5082     for (i=0; i<nrecvs; i++){
5083       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5084       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5085     }
5086 
5087     /* pack the outgoing message j-array */
5088     k = 0;
5089     for (i=0; i<nsends; i++){
5090       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5091       bufJ = bufj+sstartsj[i];
5092       for (j=0; j<nrows; j++) {
5093         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5094         for (ll=0; ll<sbs; ll++){
5095           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5096           for (l=0; l<ncols; l++){
5097             *bufJ++ = cols[l];
5098           }
5099           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5100         }
5101       }
5102       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5103     }
5104 
5105     /* recvs and sends of j-array are completed */
5106     i = nrecvs;
5107     while (i--) {
5108       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5109     }
5110     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5111   } else if (scall == MAT_REUSE_MATRIX){
5112     sstartsj = *startsj;
5113     rstartsj = *startsj_r;
5114     bufa     = *bufa_ptr;
5115     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5116     b_otha   = b_oth->a;
5117   } else {
5118     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5119   }
5120 
5121   /* a-array */
5122   /*---------*/
5123   /*  post receives of a-array */
5124   for (i=0; i<nrecvs; i++){
5125     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5126     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5127   }
5128 
5129   /* pack the outgoing message a-array */
5130   k = 0;
5131   for (i=0; i<nsends; i++){
5132     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5133     bufA = bufa+sstartsj[i];
5134     for (j=0; j<nrows; j++) {
5135       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5136       for (ll=0; ll<sbs; ll++){
5137         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5138         for (l=0; l<ncols; l++){
5139           *bufA++ = vals[l];
5140         }
5141         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5142       }
5143     }
5144     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5145   }
5146   /* recvs and sends of a-array are completed */
5147   i = nrecvs;
5148   while (i--) {
5149     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5150   }
5151   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5152   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5153 
5154   if (scall == MAT_INITIAL_MATRIX){
5155     /* put together the new matrix */
5156     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5157 
5158     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5159     /* Since these are PETSc arrays, change flags to free them as necessary. */
5160     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5161     b_oth->free_a  = PETSC_TRUE;
5162     b_oth->free_ij = PETSC_TRUE;
5163     b_oth->nonew   = 0;
5164 
5165     ierr = PetscFree(bufj);CHKERRQ(ierr);
5166     if (!startsj || !bufa_ptr){
5167       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5168       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5169     } else {
5170       *startsj   = sstartsj;
5171       *startsj_r = rstartsj;
5172       *bufa_ptr  = bufa;
5173     }
5174   }
5175   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5176   PetscFunctionReturn(0);
5177 }
5178 
5179 #undef __FUNCT__
5180 #define __FUNCT__ "MatGetCommunicationStructs"
5181 /*@C
5182   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5183 
5184   Not Collective
5185 
5186   Input Parameters:
5187 . A - The matrix in mpiaij format
5188 
5189   Output Parameter:
5190 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5191 . colmap - A map from global column index to local index into lvec
5192 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5193 
5194   Level: developer
5195 
5196 @*/
5197 #if defined (PETSC_USE_CTABLE)
5198 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5199 #else
5200 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5201 #endif
5202 {
5203   Mat_MPIAIJ *a;
5204 
5205   PetscFunctionBegin;
5206   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5207   PetscValidPointer(lvec, 2);
5208   PetscValidPointer(colmap, 3);
5209   PetscValidPointer(multScatter, 4);
5210   a = (Mat_MPIAIJ *) A->data;
5211   if (lvec) *lvec = a->lvec;
5212   if (colmap) *colmap = a->colmap;
5213   if (multScatter) *multScatter = a->Mvctx;
5214   PetscFunctionReturn(0);
5215 }
5216 
5217 EXTERN_C_BEGIN
5218 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5219 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5220 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5221 EXTERN_C_END
5222 
5223 #undef __FUNCT__
5224 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5225 /*
5226     Computes (B'*A')' since computing B*A directly is untenable
5227 
5228                n                       p                          p
5229         (              )       (              )         (                  )
5230       m (      A       )  *  n (       B      )   =   m (         C        )
5231         (              )       (              )         (                  )
5232 
5233 */
5234 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5235 {
5236   PetscErrorCode     ierr;
5237   Mat                At,Bt,Ct;
5238 
5239   PetscFunctionBegin;
5240   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5241   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5242   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5243   ierr = MatDestroy(At);CHKERRQ(ierr);
5244   ierr = MatDestroy(Bt);CHKERRQ(ierr);
5245   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5246   ierr = MatDestroy(Ct);CHKERRQ(ierr);
5247   PetscFunctionReturn(0);
5248 }
5249 
5250 #undef __FUNCT__
5251 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5252 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5253 {
5254   PetscErrorCode ierr;
5255   PetscInt       m=A->rmap->n,n=B->cmap->n;
5256   Mat            Cmat;
5257 
5258   PetscFunctionBegin;
5259   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5260   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5261   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5262   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5263   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5264   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5265   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5266   *C   = Cmat;
5267   PetscFunctionReturn(0);
5268 }
5269 
5270 /* ----------------------------------------------------------------*/
5271 #undef __FUNCT__
5272 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5273 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5274 {
5275   PetscErrorCode ierr;
5276 
5277   PetscFunctionBegin;
5278   if (scall == MAT_INITIAL_MATRIX){
5279     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5280   }
5281   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5282   PetscFunctionReturn(0);
5283 }
5284 
5285 EXTERN_C_BEGIN
5286 #if defined(PETSC_HAVE_MUMPS)
5287 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5288 #endif
5289 #if defined(PETSC_HAVE_PASTIX)
5290 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5291 #endif
5292 #if defined(PETSC_HAVE_SUPERLU_DIST)
5293 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5294 #endif
5295 #if defined(PETSC_HAVE_SPOOLES)
5296 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5297 #endif
5298 EXTERN_C_END
5299 
5300 /*MC
5301    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5302 
5303    Options Database Keys:
5304 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5305 
5306   Level: beginner
5307 
5308 .seealso: MatCreateMPIAIJ()
5309 M*/
5310 
5311 EXTERN_C_BEGIN
5312 #undef __FUNCT__
5313 #define __FUNCT__ "MatCreate_MPIAIJ"
5314 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5315 {
5316   Mat_MPIAIJ     *b;
5317   PetscErrorCode ierr;
5318   PetscMPIInt    size;
5319 
5320   PetscFunctionBegin;
5321   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5322 
5323   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5324   B->data         = (void*)b;
5325   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5326   B->rmap->bs     = 1;
5327   B->assembled    = PETSC_FALSE;
5328 
5329   B->insertmode   = NOT_SET_VALUES;
5330   b->size         = size;
5331   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5332 
5333   /* build cache for off array entries formed */
5334   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5335   b->donotstash  = PETSC_FALSE;
5336   b->colmap      = 0;
5337   b->garray      = 0;
5338   b->roworiented = PETSC_TRUE;
5339 
5340   /* stuff used for matrix vector multiply */
5341   b->lvec      = PETSC_NULL;
5342   b->Mvctx     = PETSC_NULL;
5343 
5344   /* stuff for MatGetRow() */
5345   b->rowindices   = 0;
5346   b->rowvalues    = 0;
5347   b->getrowactive = PETSC_FALSE;
5348 
5349 #if defined(PETSC_HAVE_SPOOLES)
5350   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5351                                      "MatGetFactor_mpiaij_spooles",
5352                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5353 #endif
5354 #if defined(PETSC_HAVE_MUMPS)
5355   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5356                                      "MatGetFactor_aij_mumps",
5357                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5358 #endif
5359 #if defined(PETSC_HAVE_PASTIX)
5360   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5361 					   "MatGetFactor_mpiaij_pastix",
5362 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5363 #endif
5364 #if defined(PETSC_HAVE_SUPERLU_DIST)
5365   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5366                                      "MatGetFactor_mpiaij_superlu_dist",
5367                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5368 #endif
5369   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5370                                      "MatStoreValues_MPIAIJ",
5371                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5372   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5373                                      "MatRetrieveValues_MPIAIJ",
5374                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5375   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5376 				     "MatGetDiagonalBlock_MPIAIJ",
5377                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5378   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5379 				     "MatIsTranspose_MPIAIJ",
5380 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5381   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5382 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5383 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5384   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5385 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5386 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5387   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5388 				     "MatDiagonalScaleLocal_MPIAIJ",
5389 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5390   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5391                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5392                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5393   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5394                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5395                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5396   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5397                                      "MatConvert_MPIAIJ_MPISBAIJ",
5398                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5399   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5400                                      "MatMatMult_MPIDense_MPIAIJ",
5401                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5402   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5403                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5404                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5405   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5406                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5407                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5408   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5409   PetscFunctionReturn(0);
5410 }
5411 EXTERN_C_END
5412 
5413 #undef __FUNCT__
5414 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5415 /*@
5416      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5417          and "off-diagonal" part of the matrix in CSR format.
5418 
5419    Collective on MPI_Comm
5420 
5421    Input Parameters:
5422 +  comm - MPI communicator
5423 .  m - number of local rows (Cannot be PETSC_DECIDE)
5424 .  n - This value should be the same as the local size used in creating the
5425        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5426        calculated if N is given) For square matrices n is almost always m.
5427 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5428 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5429 .   i - row indices for "diagonal" portion of matrix
5430 .   j - column indices
5431 .   a - matrix values
5432 .   oi - row indices for "off-diagonal" portion of matrix
5433 .   oj - column indices
5434 -   oa - matrix values
5435 
5436    Output Parameter:
5437 .   mat - the matrix
5438 
5439    Level: advanced
5440 
5441    Notes:
5442        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5443 
5444        The i and j indices are 0 based
5445 
5446        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5447 
5448        This sets local rows and cannot be used to set off-processor values.
5449 
5450        You cannot later use MatSetValues() to change values in this matrix.
5451 
5452 .keywords: matrix, aij, compressed row, sparse, parallel
5453 
5454 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5455           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5456 @*/
5457 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5458 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5459 {
5460   PetscErrorCode ierr;
5461   Mat_MPIAIJ     *maij;
5462 
5463  PetscFunctionBegin;
5464   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5465   if (i[0]) {
5466     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5467   }
5468   if (oi[0]) {
5469     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5470   }
5471   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5472   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5473   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5474   maij = (Mat_MPIAIJ*) (*mat)->data;
5475   maij->donotstash     = PETSC_TRUE;
5476   (*mat)->preallocated = PETSC_TRUE;
5477 
5478   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5479   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5480   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5481   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5482 
5483   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5484   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5485 
5486   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5487   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5488   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5489   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5490 
5491   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5492   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5493   PetscFunctionReturn(0);
5494 }
5495 
5496 /*
5497     Special version for direct calls from Fortran
5498 */
5499 #include "private/fortranimpl.h"
5500 
5501 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5502 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5503 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5504 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5505 #endif
5506 
5507 /* Change these macros so can be used in void function */
5508 #undef CHKERRQ
5509 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5510 #undef SETERRQ2
5511 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5512 #undef SETERRQ
5513 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5514 
5515 EXTERN_C_BEGIN
5516 #undef __FUNCT__
5517 #define __FUNCT__ "matsetvaluesmpiaij_"
5518 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5519 {
5520   Mat             mat = *mmat;
5521   PetscInt        m = *mm, n = *mn;
5522   InsertMode      addv = *maddv;
5523   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5524   PetscScalar     value;
5525   PetscErrorCode  ierr;
5526 
5527   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5528   if (mat->insertmode == NOT_SET_VALUES) {
5529     mat->insertmode = addv;
5530   }
5531 #if defined(PETSC_USE_DEBUG)
5532   else if (mat->insertmode != addv) {
5533     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5534   }
5535 #endif
5536   {
5537   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5538   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5539   PetscBool       roworiented = aij->roworiented;
5540 
5541   /* Some Variables required in the macro */
5542   Mat             A = aij->A;
5543   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5544   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5545   MatScalar       *aa = a->a;
5546   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5547   Mat             B = aij->B;
5548   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5549   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5550   MatScalar       *ba = b->a;
5551 
5552   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5553   PetscInt        nonew = a->nonew;
5554   MatScalar       *ap1,*ap2;
5555 
5556   PetscFunctionBegin;
5557   for (i=0; i<m; i++) {
5558     if (im[i] < 0) continue;
5559 #if defined(PETSC_USE_DEBUG)
5560     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5561 #endif
5562     if (im[i] >= rstart && im[i] < rend) {
5563       row      = im[i] - rstart;
5564       lastcol1 = -1;
5565       rp1      = aj + ai[row];
5566       ap1      = aa + ai[row];
5567       rmax1    = aimax[row];
5568       nrow1    = ailen[row];
5569       low1     = 0;
5570       high1    = nrow1;
5571       lastcol2 = -1;
5572       rp2      = bj + bi[row];
5573       ap2      = ba + bi[row];
5574       rmax2    = bimax[row];
5575       nrow2    = bilen[row];
5576       low2     = 0;
5577       high2    = nrow2;
5578 
5579       for (j=0; j<n; j++) {
5580         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5581         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5582         if (in[j] >= cstart && in[j] < cend){
5583           col = in[j] - cstart;
5584           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5585         } else if (in[j] < 0) continue;
5586 #if defined(PETSC_USE_DEBUG)
5587         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5588 #endif
5589         else {
5590           if (mat->was_assembled) {
5591             if (!aij->colmap) {
5592               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5593             }
5594 #if defined (PETSC_USE_CTABLE)
5595             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5596 	    col--;
5597 #else
5598             col = aij->colmap[in[j]] - 1;
5599 #endif
5600             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5601               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5602               col =  in[j];
5603               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5604               B = aij->B;
5605               b = (Mat_SeqAIJ*)B->data;
5606               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5607               rp2      = bj + bi[row];
5608               ap2      = ba + bi[row];
5609               rmax2    = bimax[row];
5610               nrow2    = bilen[row];
5611               low2     = 0;
5612               high2    = nrow2;
5613               bm       = aij->B->rmap->n;
5614               ba = b->a;
5615             }
5616           } else col = in[j];
5617           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5618         }
5619       }
5620     } else {
5621       if (!aij->donotstash) {
5622         if (roworiented) {
5623           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5624         } else {
5625           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5626         }
5627       }
5628     }
5629   }}
5630   PetscFunctionReturnVoid();
5631 }
5632 EXTERN_C_END
5633 
5634