xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 3d472b5489a249cae8c8a56ebcb8e864414a0bfa)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "petscblaslapack.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscTruth     aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap->n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap->n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap->n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap->n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
207 { \
208     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
209     lastcol1 = col;\
210     while (high1-low1 > 5) { \
211       t = (low1+high1)/2; \
212       if (rp1[t] > col) high1 = t; \
213       else             low1  = t; \
214     } \
215       for (_i=low1; _i<high1; _i++) { \
216         if (rp1[_i] > col) break; \
217         if (rp1[_i] == col) { \
218           if (addv == ADD_VALUES) ap1[_i] += value;   \
219           else                    ap1[_i] = value; \
220           goto a_noinsert; \
221         } \
222       }  \
223       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
224       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
225       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
226       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
227       N = nrow1++ - 1; a->nz++; high1++; \
228       /* shift up all the later entries in this row */ \
229       for (ii=N; ii>=_i; ii--) { \
230         rp1[ii+1] = rp1[ii]; \
231         ap1[ii+1] = ap1[ii]; \
232       } \
233       rp1[_i] = col;  \
234       ap1[_i] = value;  \
235       a_noinsert: ; \
236       ailen[row] = nrow1; \
237 }
238 
239 
240 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
241 { \
242     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
243     lastcol2 = col;\
244     while (high2-low2 > 5) { \
245       t = (low2+high2)/2; \
246       if (rp2[t] > col) high2 = t; \
247       else             low2  = t; \
248     } \
249     for (_i=low2; _i<high2; _i++) {		\
250       if (rp2[_i] > col) break;			\
251       if (rp2[_i] == col) {			      \
252 	if (addv == ADD_VALUES) ap2[_i] += value;     \
253 	else                    ap2[_i] = value;      \
254 	goto b_noinsert;			      \
255       }						      \
256     }							      \
257     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
258     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
259     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
260     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
261     N = nrow2++ - 1; b->nz++; high2++;					\
262     /* shift up all the later entries in this row */			\
263     for (ii=N; ii>=_i; ii--) {						\
264       rp2[ii+1] = rp2[ii];						\
265       ap2[ii+1] = ap2[ii];						\
266     }									\
267     rp2[_i] = col;							\
268     ap2[_i] = value;							\
269     b_noinsert: ;								\
270     bilen[row] = nrow2;							\
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
275 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
276 {
277   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
278   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
279   PetscErrorCode ierr;
280   PetscInt       l,*garray = mat->garray,diag;
281 
282   PetscFunctionBegin;
283   /* code only works for square matrices A */
284 
285   /* find size of row to the left of the diagonal part */
286   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
287   row  = row - diag;
288   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
289     if (garray[b->j[b->i[row]+l]] > diag) break;
290   }
291   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
292 
293   /* diagonal part */
294   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
295 
296   /* right of diagonal part */
297   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
298   PetscFunctionReturn(0);
299 }
300 
301 #undef __FUNCT__
302 #define __FUNCT__ "MatSetValues_MPIAIJ"
303 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
304 {
305   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
306   PetscScalar    value;
307   PetscErrorCode ierr;
308   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
309   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
310   PetscTruth     roworiented = aij->roworiented;
311 
312   /* Some Variables required in the macro */
313   Mat            A = aij->A;
314   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
315   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
316   MatScalar      *aa = a->a;
317   PetscTruth     ignorezeroentries = a->ignorezeroentries;
318   Mat            B = aij->B;
319   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
320   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
321   MatScalar      *ba = b->a;
322 
323   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
324   PetscInt       nonew = a->nonew;
325   MatScalar      *ap1,*ap2;
326 
327   PetscFunctionBegin;
328   if (v) PetscValidScalarPointer(v,6);
329   for (i=0; i<m; i++) {
330     if (im[i] < 0) continue;
331 #if defined(PETSC_USE_DEBUG)
332     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
333 #endif
334     if (im[i] >= rstart && im[i] < rend) {
335       row      = im[i] - rstart;
336       lastcol1 = -1;
337       rp1      = aj + ai[row];
338       ap1      = aa + ai[row];
339       rmax1    = aimax[row];
340       nrow1    = ailen[row];
341       low1     = 0;
342       high1    = nrow1;
343       lastcol2 = -1;
344       rp2      = bj + bi[row];
345       ap2      = ba + bi[row];
346       rmax2    = bimax[row];
347       nrow2    = bilen[row];
348       low2     = 0;
349       high2    = nrow2;
350 
351       for (j=0; j<n; j++) {
352         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
353         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
354         if (in[j] >= cstart && in[j] < cend){
355           col = in[j] - cstart;
356           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
357         } else if (in[j] < 0) continue;
358 #if defined(PETSC_USE_DEBUG)
359         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
360 #endif
361         else {
362           if (mat->was_assembled) {
363             if (!aij->colmap) {
364               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
365             }
366 #if defined (PETSC_USE_CTABLE)
367             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
368 	    col--;
369 #else
370             col = aij->colmap[in[j]] - 1;
371 #endif
372             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
373               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
374               col =  in[j];
375               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
376               B = aij->B;
377               b = (Mat_SeqAIJ*)B->data;
378               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
379               rp2      = bj + bi[row];
380               ap2      = ba + bi[row];
381               rmax2    = bimax[row];
382               nrow2    = bilen[row];
383               low2     = 0;
384               high2    = nrow2;
385               bm       = aij->B->rmap->n;
386               ba = b->a;
387             }
388           } else col = in[j];
389           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
390         }
391       }
392     } else {
393       if (!aij->donotstash) {
394         if (roworiented) {
395           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
396         } else {
397           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
398         }
399       }
400     }
401   }
402   PetscFunctionReturn(0);
403 }
404 
405 #undef __FUNCT__
406 #define __FUNCT__ "MatGetValues_MPIAIJ"
407 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
408 {
409   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
410   PetscErrorCode ierr;
411   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
412   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
413 
414   PetscFunctionBegin;
415   for (i=0; i<m; i++) {
416     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
417     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
418     if (idxm[i] >= rstart && idxm[i] < rend) {
419       row = idxm[i] - rstart;
420       for (j=0; j<n; j++) {
421         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
422         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
423         if (idxn[j] >= cstart && idxn[j] < cend){
424           col = idxn[j] - cstart;
425           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
426         } else {
427           if (!aij->colmap) {
428             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
429           }
430 #if defined (PETSC_USE_CTABLE)
431           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
432           col --;
433 #else
434           col = aij->colmap[idxn[j]] - 1;
435 #endif
436           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
437           else {
438             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
439           }
440         }
441       }
442     } else {
443       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
444     }
445   }
446   PetscFunctionReturn(0);
447 }
448 
449 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
453 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
454 {
455   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
456   PetscErrorCode ierr;
457   PetscInt       nstash,reallocs;
458   InsertMode     addv;
459 
460   PetscFunctionBegin;
461   if (aij->donotstash) {
462     PetscFunctionReturn(0);
463   }
464 
465   /* make sure all processors are either in INSERTMODE or ADDMODE */
466   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
467   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
468   mat->insertmode = addv; /* in case this processor had no cache */
469 
470   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
471   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
472   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
473   PetscFunctionReturn(0);
474 }
475 
476 #undef __FUNCT__
477 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
478 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
479 {
480   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
481   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
482   PetscErrorCode ierr;
483   PetscMPIInt    n;
484   PetscInt       i,j,rstart,ncols,flg;
485   PetscInt       *row,*col;
486   PetscTruth     other_disassembled;
487   PetscScalar    *val;
488   InsertMode     addv = mat->insertmode;
489 
490   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
491   PetscFunctionBegin;
492   if (!aij->donotstash) {
493     while (1) {
494       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
495       if (!flg) break;
496 
497       for (i=0; i<n;) {
498         /* Now identify the consecutive vals belonging to the same row */
499         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
500         if (j < n) ncols = j-i;
501         else       ncols = n-i;
502         /* Now assemble all these values with a single function call */
503         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
504         i = j;
505       }
506     }
507     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
508   }
509   a->compressedrow.use     = PETSC_FALSE;
510   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
511   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
512 
513   /* determine if any processor has disassembled, if so we must
514      also disassemble ourselfs, in order that we may reassemble. */
515   /*
516      if nonzero structure of submatrix B cannot change then we know that
517      no processor disassembled thus we can skip this stuff
518   */
519   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
520     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
521     if (mat->was_assembled && !other_disassembled) {
522       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
523     }
524   }
525   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
526     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
527   }
528   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
529   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
530   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
531   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
532 
533   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
534   aij->rowvalues = 0;
535 
536   /* used by MatAXPY() */
537   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
538   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
539 
540   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
541   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
542   PetscFunctionReturn(0);
543 }
544 
545 #undef __FUNCT__
546 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
547 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
548 {
549   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
550   PetscErrorCode ierr;
551 
552   PetscFunctionBegin;
553   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
554   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
555   PetscFunctionReturn(0);
556 }
557 
558 #undef __FUNCT__
559 #define __FUNCT__ "MatZeroRows_MPIAIJ"
560 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
561 {
562   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
563   PetscErrorCode ierr;
564   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
565   PetscInt       i,*owners = A->rmap->range;
566   PetscInt       *nprocs,j,idx,nsends,row;
567   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
568   PetscInt       *rvalues,count,base,slen,*source;
569   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
570   MPI_Comm       comm = ((PetscObject)A)->comm;
571   MPI_Request    *send_waits,*recv_waits;
572   MPI_Status     recv_status,*send_status;
573 #if defined(PETSC_DEBUG)
574   PetscTruth     found = PETSC_FALSE;
575 #endif
576 
577   PetscFunctionBegin;
578   /*  first count number of contributors to each processor */
579   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
580   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
581   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
582   j = 0;
583   for (i=0; i<N; i++) {
584     if (lastidx > (idx = rows[i])) j = 0;
585     lastidx = idx;
586     for (; j<size; j++) {
587       if (idx >= owners[j] && idx < owners[j+1]) {
588         nprocs[2*j]++;
589         nprocs[2*j+1] = 1;
590         owner[i] = j;
591 #if defined(PETSC_DEBUG)
592         found = PETSC_TRUE;
593 #endif
594         break;
595       }
596     }
597 #if defined(PETSC_DEBUG)
598     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
599     found = PETSC_FALSE;
600 #endif
601   }
602   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
603 
604   /* inform other processors of number of messages and max length*/
605   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
606 
607   /* post receives:   */
608   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
609   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
610   for (i=0; i<nrecvs; i++) {
611     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
612   }
613 
614   /* do sends:
615       1) starts[i] gives the starting index in svalues for stuff going to
616          the ith processor
617   */
618   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
619   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
620   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
621   starts[0] = 0;
622   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
623   for (i=0; i<N; i++) {
624     svalues[starts[owner[i]]++] = rows[i];
625   }
626 
627   starts[0] = 0;
628   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
629   count = 0;
630   for (i=0; i<size; i++) {
631     if (nprocs[2*i+1]) {
632       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
633     }
634   }
635   ierr = PetscFree(starts);CHKERRQ(ierr);
636 
637   base = owners[rank];
638 
639   /*  wait on receives */
640   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
641   count  = nrecvs; slen = 0;
642   while (count) {
643     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
644     /* unpack receives into our local space */
645     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
646     source[imdex]  = recv_status.MPI_SOURCE;
647     lens[imdex]    = n;
648     slen          += n;
649     count--;
650   }
651   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
652 
653   /* move the data into the send scatter */
654   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
655   count = 0;
656   for (i=0; i<nrecvs; i++) {
657     values = rvalues + i*nmax;
658     for (j=0; j<lens[i]; j++) {
659       lrows[count++] = values[j] - base;
660     }
661   }
662   ierr = PetscFree(rvalues);CHKERRQ(ierr);
663   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
664   ierr = PetscFree(owner);CHKERRQ(ierr);
665   ierr = PetscFree(nprocs);CHKERRQ(ierr);
666 
667   /* actually zap the local rows */
668   /*
669         Zero the required rows. If the "diagonal block" of the matrix
670      is square and the user wishes to set the diagonal we use separate
671      code so that MatSetValues() is not called for each diagonal allocating
672      new memory, thus calling lots of mallocs and slowing things down.
673 
674   */
675   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
676   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
677   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
678     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
679   } else if (diag != 0.0) {
680     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
681     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
682       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
683 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
684     }
685     for (i = 0; i < slen; i++) {
686       row  = lrows[i] + rstart;
687       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
688     }
689     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
690     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
691   } else {
692     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
693   }
694   ierr = PetscFree(lrows);CHKERRQ(ierr);
695 
696   /* wait on sends */
697   if (nsends) {
698     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
699     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
700     ierr = PetscFree(send_status);CHKERRQ(ierr);
701   }
702   ierr = PetscFree(send_waits);CHKERRQ(ierr);
703   ierr = PetscFree(svalues);CHKERRQ(ierr);
704 
705   PetscFunctionReturn(0);
706 }
707 
708 #undef __FUNCT__
709 #define __FUNCT__ "MatMult_MPIAIJ"
710 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
711 {
712   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
713   PetscErrorCode ierr;
714   PetscInt       nt;
715 
716   PetscFunctionBegin;
717   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
718   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
719   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
720   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
721   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
722   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
723   PetscFunctionReturn(0);
724 }
725 
726 #undef __FUNCT__
727 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
728 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
729 {
730   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
731   PetscErrorCode ierr;
732 
733   PetscFunctionBegin;
734   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
735   PetscFunctionReturn(0);
736 }
737 
738 #undef __FUNCT__
739 #define __FUNCT__ "MatMultAdd_MPIAIJ"
740 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
741 {
742   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
743   PetscErrorCode ierr;
744 
745   PetscFunctionBegin;
746   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
747   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
748   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
749   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
750   PetscFunctionReturn(0);
751 }
752 
753 #undef __FUNCT__
754 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
755 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
756 {
757   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
758   PetscErrorCode ierr;
759   PetscTruth     merged;
760 
761   PetscFunctionBegin;
762   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
763   /* do nondiagonal part */
764   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
765   if (!merged) {
766     /* send it on its way */
767     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
768     /* do local part */
769     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
770     /* receive remote parts: note this assumes the values are not actually */
771     /* added in yy until the next line, */
772     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
773   } else {
774     /* do local part */
775     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
776     /* send it on its way */
777     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
778     /* values actually were received in the Begin() but we need to call this nop */
779     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
780   }
781   PetscFunctionReturn(0);
782 }
783 
784 EXTERN_C_BEGIN
785 #undef __FUNCT__
786 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
787 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
788 {
789   MPI_Comm       comm;
790   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
791   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
792   IS             Me,Notme;
793   PetscErrorCode ierr;
794   PetscInt       M,N,first,last,*notme,i;
795   PetscMPIInt    size;
796 
797   PetscFunctionBegin;
798 
799   /* Easy test: symmetric diagonal block */
800   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
801   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
802   if (!*f) PetscFunctionReturn(0);
803   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
804   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
805   if (size == 1) PetscFunctionReturn(0);
806 
807   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
808   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
809   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
810   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
811   for (i=0; i<first; i++) notme[i] = i;
812   for (i=last; i<M; i++) notme[i-last+first] = i;
813   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
814   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
815   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
816   Aoff = Aoffs[0];
817   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
818   Boff = Boffs[0];
819   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
820   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
821   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
822   ierr = ISDestroy(Me);CHKERRQ(ierr);
823   ierr = ISDestroy(Notme);CHKERRQ(ierr);
824 
825   PetscFunctionReturn(0);
826 }
827 EXTERN_C_END
828 
829 #undef __FUNCT__
830 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
831 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
832 {
833   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
834   PetscErrorCode ierr;
835 
836   PetscFunctionBegin;
837   /* do nondiagonal part */
838   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
839   /* send it on its way */
840   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
841   /* do local part */
842   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
843   /* receive remote parts */
844   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
845   PetscFunctionReturn(0);
846 }
847 
848 /*
849   This only works correctly for square matrices where the subblock A->A is the
850    diagonal block
851 */
852 #undef __FUNCT__
853 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
854 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
855 {
856   PetscErrorCode ierr;
857   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
858 
859   PetscFunctionBegin;
860   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
861   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
862   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
863   PetscFunctionReturn(0);
864 }
865 
866 #undef __FUNCT__
867 #define __FUNCT__ "MatScale_MPIAIJ"
868 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
869 {
870   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
871   PetscErrorCode ierr;
872 
873   PetscFunctionBegin;
874   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
875   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
876   PetscFunctionReturn(0);
877 }
878 
879 #undef __FUNCT__
880 #define __FUNCT__ "MatDestroy_MPIAIJ"
881 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
882 {
883   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
884   PetscErrorCode ierr;
885 
886   PetscFunctionBegin;
887 #if defined(PETSC_USE_LOG)
888   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
889 #endif
890   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
891   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
892   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
893   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
894 #if defined (PETSC_USE_CTABLE)
895   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
896 #else
897   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
898 #endif
899   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
900   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
901   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
902   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
903   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
904   ierr = PetscFree(aij);CHKERRQ(ierr);
905 
906   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
907   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
908   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
909   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
910   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
911   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
912   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
913   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
914   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
915   PetscFunctionReturn(0);
916 }
917 
918 #undef __FUNCT__
919 #define __FUNCT__ "MatView_MPIAIJ_Binary"
920 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
921 {
922   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
923   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
924   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
925   PetscErrorCode    ierr;
926   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
927   int               fd;
928   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
929   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
930   PetscScalar       *column_values;
931 
932   PetscFunctionBegin;
933   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
934   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
935   nz   = A->nz + B->nz;
936   if (!rank) {
937     header[0] = MAT_FILE_CLASSID;
938     header[1] = mat->rmap->N;
939     header[2] = mat->cmap->N;
940     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
941     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
942     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
943     /* get largest number of rows any processor has */
944     rlen = mat->rmap->n;
945     range = mat->rmap->range;
946     for (i=1; i<size; i++) {
947       rlen = PetscMax(rlen,range[i+1] - range[i]);
948     }
949   } else {
950     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
951     rlen = mat->rmap->n;
952   }
953 
954   /* load up the local row counts */
955   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
956   for (i=0; i<mat->rmap->n; i++) {
957     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
958   }
959 
960   /* store the row lengths to the file */
961   if (!rank) {
962     MPI_Status status;
963     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
964     for (i=1; i<size; i++) {
965       rlen = range[i+1] - range[i];
966       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
967       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
968     }
969   } else {
970     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
971   }
972   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
973 
974   /* load up the local column indices */
975   nzmax = nz; /* )th processor needs space a largest processor needs */
976   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
977   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
978   cnt  = 0;
979   for (i=0; i<mat->rmap->n; i++) {
980     for (j=B->i[i]; j<B->i[i+1]; j++) {
981       if ( (col = garray[B->j[j]]) > cstart) break;
982       column_indices[cnt++] = col;
983     }
984     for (k=A->i[i]; k<A->i[i+1]; k++) {
985       column_indices[cnt++] = A->j[k] + cstart;
986     }
987     for (; j<B->i[i+1]; j++) {
988       column_indices[cnt++] = garray[B->j[j]];
989     }
990   }
991   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
992 
993   /* store the column indices to the file */
994   if (!rank) {
995     MPI_Status status;
996     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
997     for (i=1; i<size; i++) {
998       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
999       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1000       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1001       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1002     }
1003   } else {
1004     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1005     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1006   }
1007   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1008 
1009   /* load up the local column values */
1010   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1011   cnt  = 0;
1012   for (i=0; i<mat->rmap->n; i++) {
1013     for (j=B->i[i]; j<B->i[i+1]; j++) {
1014       if ( garray[B->j[j]] > cstart) break;
1015       column_values[cnt++] = B->a[j];
1016     }
1017     for (k=A->i[i]; k<A->i[i+1]; k++) {
1018       column_values[cnt++] = A->a[k];
1019     }
1020     for (; j<B->i[i+1]; j++) {
1021       column_values[cnt++] = B->a[j];
1022     }
1023   }
1024   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1025 
1026   /* store the column values to the file */
1027   if (!rank) {
1028     MPI_Status status;
1029     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1030     for (i=1; i<size; i++) {
1031       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1032       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1033       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1034       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1035     }
1036   } else {
1037     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1038     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1039   }
1040   ierr = PetscFree(column_values);CHKERRQ(ierr);
1041   PetscFunctionReturn(0);
1042 }
1043 
1044 #undef __FUNCT__
1045 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1046 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1047 {
1048   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1049   PetscErrorCode    ierr;
1050   PetscMPIInt       rank = aij->rank,size = aij->size;
1051   PetscTruth        isdraw,iascii,isbinary;
1052   PetscViewer       sviewer;
1053   PetscViewerFormat format;
1054 
1055   PetscFunctionBegin;
1056   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1057   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1058   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1059   if (iascii) {
1060     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1061     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1062       MatInfo    info;
1063       PetscTruth inodes;
1064 
1065       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1066       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1067       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1068       if (!inodes) {
1069         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1070 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1071       } else {
1072         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1073 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1074       }
1075       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1076       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1077       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1078       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1079       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1080       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1081       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1082       PetscFunctionReturn(0);
1083     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1084       PetscInt   inodecount,inodelimit,*inodes;
1085       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1086       if (inodes) {
1087         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1088       } else {
1089         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1090       }
1091       PetscFunctionReturn(0);
1092     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1093       PetscFunctionReturn(0);
1094     }
1095   } else if (isbinary) {
1096     if (size == 1) {
1097       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1098       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1099     } else {
1100       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1101     }
1102     PetscFunctionReturn(0);
1103   } else if (isdraw) {
1104     PetscDraw  draw;
1105     PetscTruth isnull;
1106     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1107     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1108   }
1109 
1110   if (size == 1) {
1111     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1112     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1113   } else {
1114     /* assemble the entire matrix onto first processor. */
1115     Mat         A;
1116     Mat_SeqAIJ  *Aloc;
1117     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1118     MatScalar   *a;
1119 
1120     if (mat->rmap->N > 1024) {
1121       PetscTruth flg = PETSC_FALSE;
1122 
1123       ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1124       if (!flg) {
1125         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1126       }
1127     }
1128 
1129     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1130     if (!rank) {
1131       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1132     } else {
1133       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1134     }
1135     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1136     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1137     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1138     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1139 
1140     /* copy over the A part */
1141     Aloc = (Mat_SeqAIJ*)aij->A->data;
1142     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1143     row = mat->rmap->rstart;
1144     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1145     for (i=0; i<m; i++) {
1146       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1147       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1148     }
1149     aj = Aloc->j;
1150     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1151 
1152     /* copy over the B part */
1153     Aloc = (Mat_SeqAIJ*)aij->B->data;
1154     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1155     row  = mat->rmap->rstart;
1156     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1157     ct   = cols;
1158     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1159     for (i=0; i<m; i++) {
1160       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1161       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1162     }
1163     ierr = PetscFree(ct);CHKERRQ(ierr);
1164     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1165     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1166     /*
1167        Everyone has to call to draw the matrix since the graphics waits are
1168        synchronized across all processors that share the PetscDraw object
1169     */
1170     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1171     if (!rank) {
1172       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1173       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1174     }
1175     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1176     ierr = MatDestroy(A);CHKERRQ(ierr);
1177   }
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 #undef __FUNCT__
1182 #define __FUNCT__ "MatView_MPIAIJ"
1183 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1184 {
1185   PetscErrorCode ierr;
1186   PetscTruth     iascii,isdraw,issocket,isbinary;
1187 
1188   PetscFunctionBegin;
1189   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1190   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1191   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1192   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1193   if (iascii || isdraw || isbinary || issocket) {
1194     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1195   } else {
1196     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1197   }
1198   PetscFunctionReturn(0);
1199 }
1200 
1201 #undef __FUNCT__
1202 #define __FUNCT__ "MatSOR_MPIAIJ"
1203 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1204 {
1205   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1206   PetscErrorCode ierr;
1207   Vec            bb1 = 0;
1208   PetscTruth     hasop;
1209 
1210   PetscFunctionBegin;
1211   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1212     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1213   }
1214 
1215   if (flag == SOR_APPLY_UPPER) {
1216     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1217     PetscFunctionReturn(0);
1218   }
1219 
1220   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1221     if (flag & SOR_ZERO_INITIAL_GUESS) {
1222       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1223       its--;
1224     }
1225 
1226     while (its--) {
1227       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1228       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1229 
1230       /* update rhs: bb1 = bb - B*x */
1231       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1232       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1233 
1234       /* local sweep */
1235       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1236     }
1237   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1238     if (flag & SOR_ZERO_INITIAL_GUESS) {
1239       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1240       its--;
1241     }
1242     while (its--) {
1243       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1244       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1245 
1246       /* update rhs: bb1 = bb - B*x */
1247       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1248       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1249 
1250       /* local sweep */
1251       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1252     }
1253   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1254     if (flag & SOR_ZERO_INITIAL_GUESS) {
1255       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1256       its--;
1257     }
1258     while (its--) {
1259       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1260       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1261 
1262       /* update rhs: bb1 = bb - B*x */
1263       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1264       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1265 
1266       /* local sweep */
1267       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1268     }
1269   }  else if (flag & SOR_EISENSTAT) {
1270     Vec         xx1;
1271 
1272     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1273     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1274 
1275     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1276     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1277     if (!mat->diag) {
1278       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1279       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1280     }
1281     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1282     if (hasop) {
1283       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1284     } else {
1285       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1286     }
1287     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1288 
1289     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1290 
1291     /* local sweep */
1292     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1293     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1294     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1295   } else {
1296     SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1297   }
1298 
1299   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 #undef __FUNCT__
1304 #define __FUNCT__ "MatPermute_MPIAIJ"
1305 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1306 {
1307   MPI_Comm       comm,pcomm;
1308   PetscInt       first,local_size,nrows;
1309   const PetscInt *rows;
1310   PetscMPIInt    size;
1311   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1312   PetscErrorCode ierr;
1313 
1314   PetscFunctionBegin;
1315   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1316   /* make a collective version of 'rowp' */
1317   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1318   if (pcomm==comm) {
1319     crowp = rowp;
1320   } else {
1321     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1322     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1323     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1324     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1325   }
1326   /* collect the global row permutation and invert it */
1327   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1328   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1329   if (pcomm!=comm) {
1330     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1331   }
1332   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1333   /* get the local target indices */
1334   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1335   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1336   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1337   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
1338   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1339   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1340   /* the column permutation is so much easier;
1341      make a local version of 'colp' and invert it */
1342   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1343   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1344   if (size==1) {
1345     lcolp = colp;
1346   } else {
1347     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1348     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1349     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
1350   }
1351   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1352   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1353   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1354   if (size>1) {
1355     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1356     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1357   }
1358   /* now we just get the submatrix */
1359   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1360   /* clean up */
1361   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1362   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 #undef __FUNCT__
1367 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1368 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1369 {
1370   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1371   Mat            A = mat->A,B = mat->B;
1372   PetscErrorCode ierr;
1373   PetscReal      isend[5],irecv[5];
1374 
1375   PetscFunctionBegin;
1376   info->block_size     = 1.0;
1377   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1378   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1379   isend[3] = info->memory;  isend[4] = info->mallocs;
1380   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1381   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1382   isend[3] += info->memory;  isend[4] += info->mallocs;
1383   if (flag == MAT_LOCAL) {
1384     info->nz_used      = isend[0];
1385     info->nz_allocated = isend[1];
1386     info->nz_unneeded  = isend[2];
1387     info->memory       = isend[3];
1388     info->mallocs      = isend[4];
1389   } else if (flag == MAT_GLOBAL_MAX) {
1390     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1391     info->nz_used      = irecv[0];
1392     info->nz_allocated = irecv[1];
1393     info->nz_unneeded  = irecv[2];
1394     info->memory       = irecv[3];
1395     info->mallocs      = irecv[4];
1396   } else if (flag == MAT_GLOBAL_SUM) {
1397     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1398     info->nz_used      = irecv[0];
1399     info->nz_allocated = irecv[1];
1400     info->nz_unneeded  = irecv[2];
1401     info->memory       = irecv[3];
1402     info->mallocs      = irecv[4];
1403   }
1404   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1405   info->fill_ratio_needed = 0;
1406   info->factor_mallocs    = 0;
1407 
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 #undef __FUNCT__
1412 #define __FUNCT__ "MatSetOption_MPIAIJ"
1413 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1414 {
1415   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1416   PetscErrorCode ierr;
1417 
1418   PetscFunctionBegin;
1419   switch (op) {
1420   case MAT_NEW_NONZERO_LOCATIONS:
1421   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1422   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1423   case MAT_KEEP_NONZERO_PATTERN:
1424   case MAT_NEW_NONZERO_LOCATION_ERR:
1425   case MAT_USE_INODES:
1426   case MAT_IGNORE_ZERO_ENTRIES:
1427     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1428     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1429     break;
1430   case MAT_ROW_ORIENTED:
1431     a->roworiented = flg;
1432     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1433     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1434     break;
1435   case MAT_NEW_DIAGONALS:
1436     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1437     break;
1438   case MAT_IGNORE_OFF_PROC_ENTRIES:
1439     a->donotstash = PETSC_TRUE;
1440     break;
1441   case MAT_SYMMETRIC:
1442     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1443     break;
1444   case MAT_STRUCTURALLY_SYMMETRIC:
1445     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1446     break;
1447   case MAT_HERMITIAN:
1448     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1449     break;
1450   case MAT_SYMMETRY_ETERNAL:
1451     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1452     break;
1453   default:
1454     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1455   }
1456   PetscFunctionReturn(0);
1457 }
1458 
1459 #undef __FUNCT__
1460 #define __FUNCT__ "MatGetRow_MPIAIJ"
1461 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1462 {
1463   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1464   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1465   PetscErrorCode ierr;
1466   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1467   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1468   PetscInt       *cmap,*idx_p;
1469 
1470   PetscFunctionBegin;
1471   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1472   mat->getrowactive = PETSC_TRUE;
1473 
1474   if (!mat->rowvalues && (idx || v)) {
1475     /*
1476         allocate enough space to hold information from the longest row.
1477     */
1478     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1479     PetscInt   max = 1,tmp;
1480     for (i=0; i<matin->rmap->n; i++) {
1481       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1482       if (max < tmp) { max = tmp; }
1483     }
1484     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1485   }
1486 
1487   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1488   lrow = row - rstart;
1489 
1490   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1491   if (!v)   {pvA = 0; pvB = 0;}
1492   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1493   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1494   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1495   nztot = nzA + nzB;
1496 
1497   cmap  = mat->garray;
1498   if (v  || idx) {
1499     if (nztot) {
1500       /* Sort by increasing column numbers, assuming A and B already sorted */
1501       PetscInt imark = -1;
1502       if (v) {
1503         *v = v_p = mat->rowvalues;
1504         for (i=0; i<nzB; i++) {
1505           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1506           else break;
1507         }
1508         imark = i;
1509         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1510         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1511       }
1512       if (idx) {
1513         *idx = idx_p = mat->rowindices;
1514         if (imark > -1) {
1515           for (i=0; i<imark; i++) {
1516             idx_p[i] = cmap[cworkB[i]];
1517           }
1518         } else {
1519           for (i=0; i<nzB; i++) {
1520             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1521             else break;
1522           }
1523           imark = i;
1524         }
1525         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1526         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1527       }
1528     } else {
1529       if (idx) *idx = 0;
1530       if (v)   *v   = 0;
1531     }
1532   }
1533   *nz = nztot;
1534   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1535   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1536   PetscFunctionReturn(0);
1537 }
1538 
1539 #undef __FUNCT__
1540 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1541 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1542 {
1543   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1544 
1545   PetscFunctionBegin;
1546   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1547   aij->getrowactive = PETSC_FALSE;
1548   PetscFunctionReturn(0);
1549 }
1550 
1551 #undef __FUNCT__
1552 #define __FUNCT__ "MatNorm_MPIAIJ"
1553 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1554 {
1555   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1556   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1557   PetscErrorCode ierr;
1558   PetscInt       i,j,cstart = mat->cmap->rstart;
1559   PetscReal      sum = 0.0;
1560   MatScalar      *v;
1561 
1562   PetscFunctionBegin;
1563   if (aij->size == 1) {
1564     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1565   } else {
1566     if (type == NORM_FROBENIUS) {
1567       v = amat->a;
1568       for (i=0; i<amat->nz; i++) {
1569 #if defined(PETSC_USE_COMPLEX)
1570         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1571 #else
1572         sum += (*v)*(*v); v++;
1573 #endif
1574       }
1575       v = bmat->a;
1576       for (i=0; i<bmat->nz; i++) {
1577 #if defined(PETSC_USE_COMPLEX)
1578         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1579 #else
1580         sum += (*v)*(*v); v++;
1581 #endif
1582       }
1583       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1584       *norm = sqrt(*norm);
1585     } else if (type == NORM_1) { /* max column norm */
1586       PetscReal *tmp,*tmp2;
1587       PetscInt  *jj,*garray = aij->garray;
1588       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1589       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1590       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1591       *norm = 0.0;
1592       v = amat->a; jj = amat->j;
1593       for (j=0; j<amat->nz; j++) {
1594         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1595       }
1596       v = bmat->a; jj = bmat->j;
1597       for (j=0; j<bmat->nz; j++) {
1598         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1599       }
1600       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1601       for (j=0; j<mat->cmap->N; j++) {
1602         if (tmp2[j] > *norm) *norm = tmp2[j];
1603       }
1604       ierr = PetscFree(tmp);CHKERRQ(ierr);
1605       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1606     } else if (type == NORM_INFINITY) { /* max row norm */
1607       PetscReal ntemp = 0.0;
1608       for (j=0; j<aij->A->rmap->n; j++) {
1609         v = amat->a + amat->i[j];
1610         sum = 0.0;
1611         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1612           sum += PetscAbsScalar(*v); v++;
1613         }
1614         v = bmat->a + bmat->i[j];
1615         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1616           sum += PetscAbsScalar(*v); v++;
1617         }
1618         if (sum > ntemp) ntemp = sum;
1619       }
1620       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1621     } else {
1622       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1623     }
1624   }
1625   PetscFunctionReturn(0);
1626 }
1627 
1628 #undef __FUNCT__
1629 #define __FUNCT__ "MatTranspose_MPIAIJ"
1630 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1631 {
1632   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1633   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1634   PetscErrorCode ierr;
1635   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1636   PetscInt       cstart=A->cmap->rstart,ncol;
1637   Mat            B;
1638   MatScalar      *array;
1639 
1640   PetscFunctionBegin;
1641   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1642 
1643   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1644   ai = Aloc->i; aj = Aloc->j;
1645   bi = Bloc->i; bj = Bloc->j;
1646   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1647     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1648     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1649     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1650     for (i=0; i<ai[ma]; i++){
1651       d_nnz[aj[i]] ++;
1652       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1653     }
1654 
1655     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1656     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1657     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1658     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1659     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1660   } else {
1661     B = *matout;
1662   }
1663 
1664   /* copy over the A part */
1665   array = Aloc->a;
1666   row = A->rmap->rstart;
1667   for (i=0; i<ma; i++) {
1668     ncol = ai[i+1]-ai[i];
1669     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1670     row++; array += ncol; aj += ncol;
1671   }
1672   aj = Aloc->j;
1673   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1674 
1675   /* copy over the B part */
1676   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1677   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1678   array = Bloc->a;
1679   row = A->rmap->rstart;
1680   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1681   cols_tmp = cols;
1682   for (i=0; i<mb; i++) {
1683     ncol = bi[i+1]-bi[i];
1684     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1685     row++; array += ncol; cols_tmp += ncol;
1686   }
1687   ierr = PetscFree(cols);CHKERRQ(ierr);
1688 
1689   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1690   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1691   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1692     *matout = B;
1693   } else {
1694     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1695   }
1696   PetscFunctionReturn(0);
1697 }
1698 
1699 #undef __FUNCT__
1700 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1701 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1702 {
1703   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1704   Mat            a = aij->A,b = aij->B;
1705   PetscErrorCode ierr;
1706   PetscInt       s1,s2,s3;
1707 
1708   PetscFunctionBegin;
1709   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1710   if (rr) {
1711     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1712     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1713     /* Overlap communication with computation. */
1714     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1715   }
1716   if (ll) {
1717     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1718     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1719     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1720   }
1721   /* scale  the diagonal block */
1722   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1723 
1724   if (rr) {
1725     /* Do a scatter end and then right scale the off-diagonal block */
1726     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1727     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1728   }
1729 
1730   PetscFunctionReturn(0);
1731 }
1732 
1733 #undef __FUNCT__
1734 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1735 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1736 {
1737   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1738   PetscErrorCode ierr;
1739 
1740   PetscFunctionBegin;
1741   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1742   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1743   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
1744   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
1745   PetscFunctionReturn(0);
1746 }
1747 #undef __FUNCT__
1748 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1749 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1750 {
1751   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1752   PetscErrorCode ierr;
1753 
1754   PetscFunctionBegin;
1755   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1756   PetscFunctionReturn(0);
1757 }
1758 
1759 #undef __FUNCT__
1760 #define __FUNCT__ "MatEqual_MPIAIJ"
1761 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1762 {
1763   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1764   Mat            a,b,c,d;
1765   PetscTruth     flg;
1766   PetscErrorCode ierr;
1767 
1768   PetscFunctionBegin;
1769   a = matA->A; b = matA->B;
1770   c = matB->A; d = matB->B;
1771 
1772   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1773   if (flg) {
1774     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1775   }
1776   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1777   PetscFunctionReturn(0);
1778 }
1779 
1780 #undef __FUNCT__
1781 #define __FUNCT__ "MatCopy_MPIAIJ"
1782 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1783 {
1784   PetscErrorCode ierr;
1785   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1786   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1787 
1788   PetscFunctionBegin;
1789   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1790   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1791     /* because of the column compression in the off-processor part of the matrix a->B,
1792        the number of columns in a->B and b->B may be different, hence we cannot call
1793        the MatCopy() directly on the two parts. If need be, we can provide a more
1794        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1795        then copying the submatrices */
1796     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1797   } else {
1798     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1799     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1800   }
1801   PetscFunctionReturn(0);
1802 }
1803 
1804 #undef __FUNCT__
1805 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1806 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1807 {
1808   PetscErrorCode ierr;
1809 
1810   PetscFunctionBegin;
1811   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1812   PetscFunctionReturn(0);
1813 }
1814 
1815 #undef __FUNCT__
1816 #define __FUNCT__ "MatAXPY_MPIAIJ"
1817 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1818 {
1819   PetscErrorCode ierr;
1820   PetscInt       i;
1821   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1822   PetscBLASInt   bnz,one=1;
1823   Mat_SeqAIJ     *x,*y;
1824 
1825   PetscFunctionBegin;
1826   if (str == SAME_NONZERO_PATTERN) {
1827     PetscScalar alpha = a;
1828     x = (Mat_SeqAIJ *)xx->A->data;
1829     y = (Mat_SeqAIJ *)yy->A->data;
1830     bnz = PetscBLASIntCast(x->nz);
1831     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1832     x = (Mat_SeqAIJ *)xx->B->data;
1833     y = (Mat_SeqAIJ *)yy->B->data;
1834     bnz = PetscBLASIntCast(x->nz);
1835     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1836   } else if (str == SUBSET_NONZERO_PATTERN) {
1837     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1838 
1839     x = (Mat_SeqAIJ *)xx->B->data;
1840     y = (Mat_SeqAIJ *)yy->B->data;
1841     if (y->xtoy && y->XtoY != xx->B) {
1842       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1843       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1844     }
1845     if (!y->xtoy) { /* get xtoy */
1846       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1847       y->XtoY = xx->B;
1848       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1849     }
1850     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1851   } else {
1852     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1853   }
1854   PetscFunctionReturn(0);
1855 }
1856 
1857 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1858 
1859 #undef __FUNCT__
1860 #define __FUNCT__ "MatConjugate_MPIAIJ"
1861 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1862 {
1863 #if defined(PETSC_USE_COMPLEX)
1864   PetscErrorCode ierr;
1865   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1866 
1867   PetscFunctionBegin;
1868   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1869   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1870 #else
1871   PetscFunctionBegin;
1872 #endif
1873   PetscFunctionReturn(0);
1874 }
1875 
1876 #undef __FUNCT__
1877 #define __FUNCT__ "MatRealPart_MPIAIJ"
1878 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1879 {
1880   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1881   PetscErrorCode ierr;
1882 
1883   PetscFunctionBegin;
1884   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1885   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1886   PetscFunctionReturn(0);
1887 }
1888 
1889 #undef __FUNCT__
1890 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1891 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1892 {
1893   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1894   PetscErrorCode ierr;
1895 
1896   PetscFunctionBegin;
1897   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1898   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1899   PetscFunctionReturn(0);
1900 }
1901 
1902 #ifdef PETSC_HAVE_PBGL
1903 
1904 #include <boost/parallel/mpi/bsp_process_group.hpp>
1905 #include <boost/graph/distributed/ilu_default_graph.hpp>
1906 #include <boost/graph/distributed/ilu_0_block.hpp>
1907 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1908 #include <boost/graph/distributed/petsc/interface.hpp>
1909 #include <boost/multi_array.hpp>
1910 #include <boost/parallel/distributed_property_map->hpp>
1911 
1912 #undef __FUNCT__
1913 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1914 /*
1915   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1916 */
1917 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1918 {
1919   namespace petsc = boost::distributed::petsc;
1920 
1921   namespace graph_dist = boost::graph::distributed;
1922   using boost::graph::distributed::ilu_default::process_group_type;
1923   using boost::graph::ilu_permuted;
1924 
1925   PetscTruth      row_identity, col_identity;
1926   PetscContainer  c;
1927   PetscInt        m, n, M, N;
1928   PetscErrorCode  ierr;
1929 
1930   PetscFunctionBegin;
1931   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1932   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1933   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1934   if (!row_identity || !col_identity) {
1935     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1936   }
1937 
1938   process_group_type pg;
1939   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1940   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1941   lgraph_type&   level_graph = *lgraph_p;
1942   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1943 
1944   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1945   ilu_permuted(level_graph);
1946 
1947   /* put together the new matrix */
1948   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1949   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1950   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1951   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1952   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1953   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1954   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1955 
1956   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1957   ierr = PetscContainerSetPointer(c, lgraph_p);
1958   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1959   PetscFunctionReturn(0);
1960 }
1961 
1962 #undef __FUNCT__
1963 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1964 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1965 {
1966   PetscFunctionBegin;
1967   PetscFunctionReturn(0);
1968 }
1969 
1970 #undef __FUNCT__
1971 #define __FUNCT__ "MatSolve_MPIAIJ"
1972 /*
1973   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1974 */
1975 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1976 {
1977   namespace graph_dist = boost::graph::distributed;
1978 
1979   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1980   lgraph_type*   lgraph_p;
1981   PetscContainer c;
1982   PetscErrorCode ierr;
1983 
1984   PetscFunctionBegin;
1985   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1986   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1987   ierr = VecCopy(b, x);CHKERRQ(ierr);
1988 
1989   PetscScalar* array_x;
1990   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1991   PetscInt sx;
1992   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1993 
1994   PetscScalar* array_b;
1995   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1996   PetscInt sb;
1997   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1998 
1999   lgraph_type&   level_graph = *lgraph_p;
2000   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2001 
2002   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2003   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2004                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2005 
2006   typedef boost::iterator_property_map<array_ref_type::iterator,
2007                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2008   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2009                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2010 
2011   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2012 
2013   PetscFunctionReturn(0);
2014 }
2015 #endif
2016 
2017 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2018   PetscInt       nzlocal,nsends,nrecvs;
2019   PetscMPIInt    *send_rank,*recv_rank;
2020   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2021   PetscScalar    *sbuf_a,**rbuf_a;
2022   PetscErrorCode (*MatDestroy)(Mat);
2023 } Mat_Redundant;
2024 
2025 #undef __FUNCT__
2026 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2027 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2028 {
2029   PetscErrorCode       ierr;
2030   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2031   PetscInt             i;
2032 
2033   PetscFunctionBegin;
2034   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2035   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2036   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2037   for (i=0; i<redund->nrecvs; i++){
2038     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2039     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2040   }
2041   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2042   ierr = PetscFree(redund);CHKERRQ(ierr);
2043   PetscFunctionReturn(0);
2044 }
2045 
2046 #undef __FUNCT__
2047 #define __FUNCT__ "MatDestroy_MatRedundant"
2048 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2049 {
2050   PetscErrorCode  ierr;
2051   PetscContainer  container;
2052   Mat_Redundant   *redund=PETSC_NULL;
2053 
2054   PetscFunctionBegin;
2055   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2056   if (container) {
2057     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2058   } else {
2059     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2060   }
2061   A->ops->destroy = redund->MatDestroy;
2062   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2063   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2064   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2065   PetscFunctionReturn(0);
2066 }
2067 
2068 #undef __FUNCT__
2069 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2070 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2071 {
2072   PetscMPIInt    rank,size;
2073   MPI_Comm       comm=((PetscObject)mat)->comm;
2074   PetscErrorCode ierr;
2075   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2076   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2077   PetscInt       *rowrange=mat->rmap->range;
2078   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2079   Mat            A=aij->A,B=aij->B,C=*matredundant;
2080   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2081   PetscScalar    *sbuf_a;
2082   PetscInt       nzlocal=a->nz+b->nz;
2083   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2084   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2085   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2086   MatScalar      *aworkA,*aworkB;
2087   PetscScalar    *vals;
2088   PetscMPIInt    tag1,tag2,tag3,imdex;
2089   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2090                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2091   MPI_Status     recv_status,*send_status;
2092   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2093   PetscInt       **rbuf_j=PETSC_NULL;
2094   PetscScalar    **rbuf_a=PETSC_NULL;
2095   Mat_Redundant  *redund=PETSC_NULL;
2096   PetscContainer container;
2097 
2098   PetscFunctionBegin;
2099   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2100   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2101 
2102   if (reuse == MAT_REUSE_MATRIX) {
2103     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2104     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2105     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2106     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2107     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2108     if (container) {
2109       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2110     } else {
2111       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2112     }
2113     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2114 
2115     nsends    = redund->nsends;
2116     nrecvs    = redund->nrecvs;
2117     send_rank = redund->send_rank;
2118     recv_rank = redund->recv_rank;
2119     sbuf_nz   = redund->sbuf_nz;
2120     rbuf_nz   = redund->rbuf_nz;
2121     sbuf_j    = redund->sbuf_j;
2122     sbuf_a    = redund->sbuf_a;
2123     rbuf_j    = redund->rbuf_j;
2124     rbuf_a    = redund->rbuf_a;
2125   }
2126 
2127   if (reuse == MAT_INITIAL_MATRIX){
2128     PetscMPIInt  subrank,subsize;
2129     PetscInt     nleftover,np_subcomm;
2130     /* get the destination processors' id send_rank, nsends and nrecvs */
2131     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2132     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2133     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2134     np_subcomm = size/nsubcomm;
2135     nleftover  = size - nsubcomm*np_subcomm;
2136     nsends = 0; nrecvs = 0;
2137     for (i=0; i<size; i++){ /* i=rank*/
2138       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2139         send_rank[nsends] = i; nsends++;
2140         recv_rank[nrecvs++] = i;
2141       }
2142     }
2143     if (rank >= size - nleftover){/* this proc is a leftover processor */
2144       i = size-nleftover-1;
2145       j = 0;
2146       while (j < nsubcomm - nleftover){
2147         send_rank[nsends++] = i;
2148         i--; j++;
2149       }
2150     }
2151 
2152     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2153       for (i=0; i<nleftover; i++){
2154         recv_rank[nrecvs++] = size-nleftover+i;
2155       }
2156     }
2157 
2158     /* allocate sbuf_j, sbuf_a */
2159     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2160     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2161     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2162   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2163 
2164   /* copy mat's local entries into the buffers */
2165   if (reuse == MAT_INITIAL_MATRIX){
2166     rownz_max = 0;
2167     rptr = sbuf_j;
2168     cols = sbuf_j + rend-rstart + 1;
2169     vals = sbuf_a;
2170     rptr[0] = 0;
2171     for (i=0; i<rend-rstart; i++){
2172       row = i + rstart;
2173       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2174       ncols  = nzA + nzB;
2175       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2176       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2177       /* load the column indices for this row into cols */
2178       lwrite = 0;
2179       for (l=0; l<nzB; l++) {
2180         if ((ctmp = bmap[cworkB[l]]) < cstart){
2181           vals[lwrite]   = aworkB[l];
2182           cols[lwrite++] = ctmp;
2183         }
2184       }
2185       for (l=0; l<nzA; l++){
2186         vals[lwrite]   = aworkA[l];
2187         cols[lwrite++] = cstart + cworkA[l];
2188       }
2189       for (l=0; l<nzB; l++) {
2190         if ((ctmp = bmap[cworkB[l]]) >= cend){
2191           vals[lwrite]   = aworkB[l];
2192           cols[lwrite++] = ctmp;
2193         }
2194       }
2195       vals += ncols;
2196       cols += ncols;
2197       rptr[i+1] = rptr[i] + ncols;
2198       if (rownz_max < ncols) rownz_max = ncols;
2199     }
2200     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2201   } else { /* only copy matrix values into sbuf_a */
2202     rptr = sbuf_j;
2203     vals = sbuf_a;
2204     rptr[0] = 0;
2205     for (i=0; i<rend-rstart; i++){
2206       row = i + rstart;
2207       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2208       ncols  = nzA + nzB;
2209       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2210       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2211       lwrite = 0;
2212       for (l=0; l<nzB; l++) {
2213         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2214       }
2215       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2216       for (l=0; l<nzB; l++) {
2217         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2218       }
2219       vals += ncols;
2220       rptr[i+1] = rptr[i] + ncols;
2221     }
2222   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2223 
2224   /* send nzlocal to others, and recv other's nzlocal */
2225   /*--------------------------------------------------*/
2226   if (reuse == MAT_INITIAL_MATRIX){
2227     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2228     s_waits2 = s_waits3 + nsends;
2229     s_waits1 = s_waits2 + nsends;
2230     r_waits1 = s_waits1 + nsends;
2231     r_waits2 = r_waits1 + nrecvs;
2232     r_waits3 = r_waits2 + nrecvs;
2233   } else {
2234     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2235     r_waits3 = s_waits3 + nsends;
2236   }
2237 
2238   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2239   if (reuse == MAT_INITIAL_MATRIX){
2240     /* get new tags to keep the communication clean */
2241     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2242     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2243     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2244 
2245     /* post receives of other's nzlocal */
2246     for (i=0; i<nrecvs; i++){
2247       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2248     }
2249     /* send nzlocal to others */
2250     for (i=0; i<nsends; i++){
2251       sbuf_nz[i] = nzlocal;
2252       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2253     }
2254     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2255     count = nrecvs;
2256     while (count) {
2257       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2258       recv_rank[imdex] = recv_status.MPI_SOURCE;
2259       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2260       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2261 
2262       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2263       rbuf_nz[imdex] += i + 2;
2264       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2265       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2266       count--;
2267     }
2268     /* wait on sends of nzlocal */
2269     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2270     /* send mat->i,j to others, and recv from other's */
2271     /*------------------------------------------------*/
2272     for (i=0; i<nsends; i++){
2273       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2274       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2275     }
2276     /* wait on receives of mat->i,j */
2277     /*------------------------------*/
2278     count = nrecvs;
2279     while (count) {
2280       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2281       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2282       count--;
2283     }
2284     /* wait on sends of mat->i,j */
2285     /*---------------------------*/
2286     if (nsends) {
2287       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2288     }
2289   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2290 
2291   /* post receives, send and receive mat->a */
2292   /*----------------------------------------*/
2293   for (imdex=0; imdex<nrecvs; imdex++) {
2294     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2295   }
2296   for (i=0; i<nsends; i++){
2297     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2298   }
2299   count = nrecvs;
2300   while (count) {
2301     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2302     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2303     count--;
2304   }
2305   if (nsends) {
2306     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2307   }
2308 
2309   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2310 
2311   /* create redundant matrix */
2312   /*-------------------------*/
2313   if (reuse == MAT_INITIAL_MATRIX){
2314     /* compute rownz_max for preallocation */
2315     for (imdex=0; imdex<nrecvs; imdex++){
2316       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2317       rptr = rbuf_j[imdex];
2318       for (i=0; i<j; i++){
2319         ncols = rptr[i+1] - rptr[i];
2320         if (rownz_max < ncols) rownz_max = ncols;
2321       }
2322     }
2323 
2324     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2325     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2326     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2327     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2328     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2329   } else {
2330     C = *matredundant;
2331   }
2332 
2333   /* insert local matrix entries */
2334   rptr = sbuf_j;
2335   cols = sbuf_j + rend-rstart + 1;
2336   vals = sbuf_a;
2337   for (i=0; i<rend-rstart; i++){
2338     row   = i + rstart;
2339     ncols = rptr[i+1] - rptr[i];
2340     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2341     vals += ncols;
2342     cols += ncols;
2343   }
2344   /* insert received matrix entries */
2345   for (imdex=0; imdex<nrecvs; imdex++){
2346     rstart = rowrange[recv_rank[imdex]];
2347     rend   = rowrange[recv_rank[imdex]+1];
2348     rptr = rbuf_j[imdex];
2349     cols = rbuf_j[imdex] + rend-rstart + 1;
2350     vals = rbuf_a[imdex];
2351     for (i=0; i<rend-rstart; i++){
2352       row   = i + rstart;
2353       ncols = rptr[i+1] - rptr[i];
2354       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2355       vals += ncols;
2356       cols += ncols;
2357     }
2358   }
2359   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2360   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2361   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2362   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2363   if (reuse == MAT_INITIAL_MATRIX){
2364     PetscContainer container;
2365     *matredundant = C;
2366     /* create a supporting struct and attach it to C for reuse */
2367     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2368     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2369     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2370     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2371     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2372 
2373     redund->nzlocal = nzlocal;
2374     redund->nsends  = nsends;
2375     redund->nrecvs  = nrecvs;
2376     redund->send_rank = send_rank;
2377     redund->recv_rank = recv_rank;
2378     redund->sbuf_nz = sbuf_nz;
2379     redund->rbuf_nz = rbuf_nz;
2380     redund->sbuf_j  = sbuf_j;
2381     redund->sbuf_a  = sbuf_a;
2382     redund->rbuf_j  = rbuf_j;
2383     redund->rbuf_a  = rbuf_a;
2384 
2385     redund->MatDestroy = C->ops->destroy;
2386     C->ops->destroy    = MatDestroy_MatRedundant;
2387   }
2388   PetscFunctionReturn(0);
2389 }
2390 
2391 #undef __FUNCT__
2392 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2393 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2394 {
2395   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2396   PetscErrorCode ierr;
2397   PetscInt       i,*idxb = 0;
2398   PetscScalar    *va,*vb;
2399   Vec            vtmp;
2400 
2401   PetscFunctionBegin;
2402   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2403   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2404   if (idx) {
2405     for (i=0; i<A->rmap->n; i++) {
2406       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2407     }
2408   }
2409 
2410   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2411   if (idx) {
2412     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2413   }
2414   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2415   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2416 
2417   for (i=0; i<A->rmap->n; i++){
2418     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2419       va[i] = vb[i];
2420       if (idx) idx[i] = a->garray[idxb[i]];
2421     }
2422   }
2423 
2424   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2425   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2426   if (idxb) {
2427     ierr = PetscFree(idxb);CHKERRQ(ierr);
2428   }
2429   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2430   PetscFunctionReturn(0);
2431 }
2432 
2433 #undef __FUNCT__
2434 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2435 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2436 {
2437   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2438   PetscErrorCode ierr;
2439   PetscInt       i,*idxb = 0;
2440   PetscScalar    *va,*vb;
2441   Vec            vtmp;
2442 
2443   PetscFunctionBegin;
2444   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2445   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2446   if (idx) {
2447     for (i=0; i<A->cmap->n; i++) {
2448       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2449     }
2450   }
2451 
2452   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2453   if (idx) {
2454     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2455   }
2456   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2457   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2458 
2459   for (i=0; i<A->rmap->n; i++){
2460     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2461       va[i] = vb[i];
2462       if (idx) idx[i] = a->garray[idxb[i]];
2463     }
2464   }
2465 
2466   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2467   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2468   if (idxb) {
2469     ierr = PetscFree(idxb);CHKERRQ(ierr);
2470   }
2471   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2472   PetscFunctionReturn(0);
2473 }
2474 
2475 #undef __FUNCT__
2476 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2477 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2478 {
2479   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2480   PetscInt       n      = A->rmap->n;
2481   PetscInt       cstart = A->cmap->rstart;
2482   PetscInt      *cmap   = mat->garray;
2483   PetscInt      *diagIdx, *offdiagIdx;
2484   Vec            diagV, offdiagV;
2485   PetscScalar   *a, *diagA, *offdiagA;
2486   PetscInt       r;
2487   PetscErrorCode ierr;
2488 
2489   PetscFunctionBegin;
2490   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2491   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2492   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2493   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2494   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2495   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2496   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2497   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2498   for(r = 0; r < n; ++r) {
2499     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2500       a[r]   = diagA[r];
2501       idx[r] = cstart + diagIdx[r];
2502     } else {
2503       a[r]   = offdiagA[r];
2504       idx[r] = cmap[offdiagIdx[r]];
2505     }
2506   }
2507   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2508   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2509   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2510   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2511   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2512   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2513   PetscFunctionReturn(0);
2514 }
2515 
2516 #undef __FUNCT__
2517 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2518 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2519 {
2520   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2521   PetscInt       n      = A->rmap->n;
2522   PetscInt       cstart = A->cmap->rstart;
2523   PetscInt      *cmap   = mat->garray;
2524   PetscInt      *diagIdx, *offdiagIdx;
2525   Vec            diagV, offdiagV;
2526   PetscScalar   *a, *diagA, *offdiagA;
2527   PetscInt       r;
2528   PetscErrorCode ierr;
2529 
2530   PetscFunctionBegin;
2531   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2532   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2533   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2534   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2535   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2536   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2537   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2538   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2539   for(r = 0; r < n; ++r) {
2540     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2541       a[r]   = diagA[r];
2542       idx[r] = cstart + diagIdx[r];
2543     } else {
2544       a[r]   = offdiagA[r];
2545       idx[r] = cmap[offdiagIdx[r]];
2546     }
2547   }
2548   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2549   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2550   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2551   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2552   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2553   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2554   PetscFunctionReturn(0);
2555 }
2556 
2557 #undef __FUNCT__
2558 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2559 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2560 {
2561   PetscErrorCode ierr;
2562   Mat            *dummy;
2563 
2564   PetscFunctionBegin;
2565   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2566   *newmat = *dummy;
2567   ierr = PetscFree(dummy);CHKERRQ(ierr);
2568   PetscFunctionReturn(0);
2569 }
2570 
2571 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2572 /* -------------------------------------------------------------------*/
2573 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2574        MatGetRow_MPIAIJ,
2575        MatRestoreRow_MPIAIJ,
2576        MatMult_MPIAIJ,
2577 /* 4*/ MatMultAdd_MPIAIJ,
2578        MatMultTranspose_MPIAIJ,
2579        MatMultTransposeAdd_MPIAIJ,
2580 #ifdef PETSC_HAVE_PBGL
2581        MatSolve_MPIAIJ,
2582 #else
2583        0,
2584 #endif
2585        0,
2586        0,
2587 /*10*/ 0,
2588        0,
2589        0,
2590        MatSOR_MPIAIJ,
2591        MatTranspose_MPIAIJ,
2592 /*15*/ MatGetInfo_MPIAIJ,
2593        MatEqual_MPIAIJ,
2594        MatGetDiagonal_MPIAIJ,
2595        MatDiagonalScale_MPIAIJ,
2596        MatNorm_MPIAIJ,
2597 /*20*/ MatAssemblyBegin_MPIAIJ,
2598        MatAssemblyEnd_MPIAIJ,
2599        MatSetOption_MPIAIJ,
2600        MatZeroEntries_MPIAIJ,
2601 /*24*/ MatZeroRows_MPIAIJ,
2602        0,
2603 #ifdef PETSC_HAVE_PBGL
2604        0,
2605 #else
2606        0,
2607 #endif
2608        0,
2609        0,
2610 /*29*/ MatSetUpPreallocation_MPIAIJ,
2611 #ifdef PETSC_HAVE_PBGL
2612        0,
2613 #else
2614        0,
2615 #endif
2616        0,
2617        0,
2618        0,
2619 /*34*/ MatDuplicate_MPIAIJ,
2620        0,
2621        0,
2622        0,
2623        0,
2624 /*39*/ MatAXPY_MPIAIJ,
2625        MatGetSubMatrices_MPIAIJ,
2626        MatIncreaseOverlap_MPIAIJ,
2627        MatGetValues_MPIAIJ,
2628        MatCopy_MPIAIJ,
2629 /*44*/ MatGetRowMax_MPIAIJ,
2630        MatScale_MPIAIJ,
2631        0,
2632        0,
2633        0,
2634 /*49*/ MatSetBlockSize_MPIAIJ,
2635        0,
2636        0,
2637        0,
2638        0,
2639 /*54*/ MatFDColoringCreate_MPIAIJ,
2640        0,
2641        MatSetUnfactored_MPIAIJ,
2642        MatPermute_MPIAIJ,
2643        0,
2644 /*59*/ MatGetSubMatrix_MPIAIJ,
2645        MatDestroy_MPIAIJ,
2646        MatView_MPIAIJ,
2647        0,
2648        0,
2649 /*64*/ 0,
2650        0,
2651        0,
2652        0,
2653        0,
2654 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2655        MatGetRowMinAbs_MPIAIJ,
2656        0,
2657        MatSetColoring_MPIAIJ,
2658 #if defined(PETSC_HAVE_ADIC)
2659        MatSetValuesAdic_MPIAIJ,
2660 #else
2661        0,
2662 #endif
2663        MatSetValuesAdifor_MPIAIJ,
2664 /*75*/ MatFDColoringApply_AIJ,
2665        0,
2666        0,
2667        0,
2668        0,
2669 /*80*/ 0,
2670        0,
2671        0,
2672 /*83*/ MatLoad_MPIAIJ,
2673        0,
2674        0,
2675        0,
2676        0,
2677        0,
2678 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2679        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2680        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2681        MatPtAP_Basic,
2682        MatPtAPSymbolic_MPIAIJ,
2683 /*94*/ MatPtAPNumeric_MPIAIJ,
2684        0,
2685        0,
2686        0,
2687        0,
2688 /*99*/ 0,
2689        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2690        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2691        MatConjugate_MPIAIJ,
2692        0,
2693 /*104*/MatSetValuesRow_MPIAIJ,
2694        MatRealPart_MPIAIJ,
2695        MatImaginaryPart_MPIAIJ,
2696        0,
2697        0,
2698 /*109*/0,
2699        MatGetRedundantMatrix_MPIAIJ,
2700        MatGetRowMin_MPIAIJ,
2701        0,
2702        0,
2703 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
2704        0,
2705        0,
2706        0,
2707        0,
2708 /*119*/0,
2709        0,
2710        0,
2711        0
2712 };
2713 
2714 /* ----------------------------------------------------------------------------------------*/
2715 
2716 EXTERN_C_BEGIN
2717 #undef __FUNCT__
2718 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2719 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2720 {
2721   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2722   PetscErrorCode ierr;
2723 
2724   PetscFunctionBegin;
2725   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2726   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2727   PetscFunctionReturn(0);
2728 }
2729 EXTERN_C_END
2730 
2731 EXTERN_C_BEGIN
2732 #undef __FUNCT__
2733 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2734 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2735 {
2736   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2737   PetscErrorCode ierr;
2738 
2739   PetscFunctionBegin;
2740   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2741   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2742   PetscFunctionReturn(0);
2743 }
2744 EXTERN_C_END
2745 
2746 #include "petscpc.h"
2747 EXTERN_C_BEGIN
2748 #undef __FUNCT__
2749 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2750 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2751 {
2752   Mat_MPIAIJ     *b;
2753   PetscErrorCode ierr;
2754   PetscInt       i;
2755 
2756   PetscFunctionBegin;
2757   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2758   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2759   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2760   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2761 
2762   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2763   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2764   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2765   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2766   if (d_nnz) {
2767     for (i=0; i<B->rmap->n; i++) {
2768       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2769     }
2770   }
2771   if (o_nnz) {
2772     for (i=0; i<B->rmap->n; i++) {
2773       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2774     }
2775   }
2776   b = (Mat_MPIAIJ*)B->data;
2777 
2778   if (!B->preallocated) {
2779     /* Explicitly create 2 MATSEQAIJ matrices. */
2780     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2781     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2782     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2783     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2784     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2785     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2786     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2787     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2788   }
2789 
2790   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2791   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2792   B->preallocated = PETSC_TRUE;
2793   PetscFunctionReturn(0);
2794 }
2795 EXTERN_C_END
2796 
2797 #undef __FUNCT__
2798 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2799 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2800 {
2801   Mat            mat;
2802   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2803   PetscErrorCode ierr;
2804 
2805   PetscFunctionBegin;
2806   *newmat       = 0;
2807   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2808   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2809   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2810   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2811   a    = (Mat_MPIAIJ*)mat->data;
2812 
2813   mat->factortype    = matin->factortype;
2814   mat->rmap->bs      = matin->rmap->bs;
2815   mat->assembled    = PETSC_TRUE;
2816   mat->insertmode   = NOT_SET_VALUES;
2817   mat->preallocated = PETSC_TRUE;
2818 
2819   a->size           = oldmat->size;
2820   a->rank           = oldmat->rank;
2821   a->donotstash     = oldmat->donotstash;
2822   a->roworiented    = oldmat->roworiented;
2823   a->rowindices     = 0;
2824   a->rowvalues      = 0;
2825   a->getrowactive   = PETSC_FALSE;
2826 
2827   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2828   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2829 
2830   if (oldmat->colmap) {
2831 #if defined (PETSC_USE_CTABLE)
2832     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2833 #else
2834     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2835     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2836     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2837 #endif
2838   } else a->colmap = 0;
2839   if (oldmat->garray) {
2840     PetscInt len;
2841     len  = oldmat->B->cmap->n;
2842     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2843     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2844     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2845   } else a->garray = 0;
2846 
2847   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2848   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2849   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2850   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2851   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2852   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2853   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2854   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2855   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2856   *newmat = mat;
2857   PetscFunctionReturn(0);
2858 }
2859 
2860 #undef __FUNCT__
2861 #define __FUNCT__ "MatLoad_MPIAIJ"
2862 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, Mat newMat)
2863 {
2864   PetscScalar    *vals,*svals;
2865   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2866   MPI_Status     status;
2867   PetscErrorCode ierr;
2868   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz;
2869   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
2870   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2871   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2872   PetscInt       cend,cstart,n,*rowners,sizesset=1;
2873   int            fd;
2874 
2875   PetscFunctionBegin;
2876   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2877   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2878   if (!rank) {
2879     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2880     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2881     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2882   }
2883 
2884   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
2885 
2886   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2887   M = header[1]; N = header[2];
2888   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2889   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
2890   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
2891 
2892   /* If global sizes are set, check if they are consistent with that given in the file */
2893   if (sizesset) {
2894     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
2895   }
2896   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2897   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2898 
2899   /* determine ownership of all rows */
2900   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
2901   else m = newMat->rmap->n; /* Set by user */
2902 
2903   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2904   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2905 
2906   /* First process needs enough room for process with most rows */
2907   if (!rank) {
2908     mmax       = rowners[1];
2909     for (i=2; i<size; i++) {
2910       mmax = PetscMax(mmax,rowners[i]);
2911     }
2912   } else mmax = m;
2913 
2914   rowners[0] = 0;
2915   for (i=2; i<=size; i++) {
2916     rowners[i] += rowners[i-1];
2917   }
2918   rstart = rowners[rank];
2919   rend   = rowners[rank+1];
2920 
2921   /* distribute row lengths to all processors */
2922   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2923   if (!rank) {
2924     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2925     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2926     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2927     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2928     for (j=0; j<m; j++) {
2929       procsnz[0] += ourlens[j];
2930     }
2931     for (i=1; i<size; i++) {
2932       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2933       /* calculate the number of nonzeros on each processor */
2934       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2935         procsnz[i] += rowlengths[j];
2936       }
2937       mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]);
2938       ierr   = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2939     }
2940     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2941   } else {
2942     mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr);
2943     ierr   = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2944   }
2945 
2946   if (!rank) {
2947     /* determine max buffer needed and allocate it */
2948     maxnz = 0;
2949     for (i=0; i<size; i++) {
2950       maxnz = PetscMax(maxnz,procsnz[i]);
2951     }
2952     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2953 
2954     /* read in my part of the matrix column indices  */
2955     nz   = procsnz[0];
2956     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2957     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2958 
2959     /* read in every one elses and ship off */
2960     for (i=1; i<size; i++) {
2961       nz     = procsnz[i];
2962       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2963       mpicnt = PetscMPIIntCast(nz);
2964       ierr   = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2965     }
2966     ierr = PetscFree(cols);CHKERRQ(ierr);
2967   } else {
2968     /* determine buffer space needed for message */
2969     nz = 0;
2970     for (i=0; i<m; i++) {
2971       nz += ourlens[i];
2972     }
2973     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2974 
2975     /* receive message of column indices*/
2976     mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr);
2977     ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2978     ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr);
2979     if (mpimaxnz == MPI_UNDEFINED) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);
2980     else if (mpimaxnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);
2981     else if (mpimaxnz != mpicnt) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);
2982   }
2983 
2984   /* determine column ownership if matrix is not square */
2985   if (N != M) {
2986     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
2987     else n = newMat->cmap->n;
2988     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2989     cstart = cend - n;
2990   } else {
2991     cstart = rstart;
2992     cend   = rend;
2993     n      = cend - cstart;
2994   }
2995 
2996   /* loop over local rows, determining number of off diagonal entries */
2997   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2998   jj = 0;
2999   for (i=0; i<m; i++) {
3000     for (j=0; j<ourlens[i]; j++) {
3001       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3002       jj++;
3003     }
3004   }
3005 
3006   for (i=0; i<m; i++) {
3007     ourlens[i] -= offlens[i];
3008   }
3009   if (!sizesset) {
3010     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3011   }
3012   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3013 
3014   for (i=0; i<m; i++) {
3015     ourlens[i] += offlens[i];
3016   }
3017 
3018   if (!rank) {
3019     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3020 
3021     /* read in my part of the matrix numerical values  */
3022     nz   = procsnz[0];
3023     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3024 
3025     /* insert into matrix */
3026     jj      = rstart;
3027     smycols = mycols;
3028     svals   = vals;
3029     for (i=0; i<m; i++) {
3030       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3031       smycols += ourlens[i];
3032       svals   += ourlens[i];
3033       jj++;
3034     }
3035 
3036     /* read in other processors and ship out */
3037     for (i=1; i<size; i++) {
3038       nz     = procsnz[i];
3039       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3040       mpicnt = PetscMPIIntCast(nz);
3041       ierr   = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3042     }
3043     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3044   } else {
3045     /* receive numeric values */
3046     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3047 
3048     /* receive message of values*/
3049     mpicnt = PetscMPIIntCast(nz);
3050     ierr   = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm,&status);CHKERRQ(ierr);
3051     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr);
3052     if (mpimaxnz == MPI_UNDEFINED) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);
3053     else if (mpimaxnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);
3054     else if (mpimaxnz != mpicnt) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);
3055 
3056     /* insert into matrix */
3057     jj      = rstart;
3058     smycols = mycols;
3059     svals   = vals;
3060     for (i=0; i<m; i++) {
3061       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3062       smycols += ourlens[i];
3063       svals   += ourlens[i];
3064       jj++;
3065     }
3066   }
3067   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3068   ierr = PetscFree(vals);CHKERRQ(ierr);
3069   ierr = PetscFree(mycols);CHKERRQ(ierr);
3070   ierr = PetscFree(rowners);CHKERRQ(ierr);
3071 
3072   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3073   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3074   PetscFunctionReturn(0);
3075 }
3076 
3077 #undef __FUNCT__
3078 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3079 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3080 {
3081   PetscErrorCode ierr;
3082   IS             iscol_local;
3083   PetscInt       csize;
3084 
3085   PetscFunctionBegin;
3086   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3087   if (call == MAT_REUSE_MATRIX) {
3088     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3089     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3090   } else {
3091     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3092   }
3093   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3094   if (call == MAT_INITIAL_MATRIX) {
3095     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3096     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3097   }
3098   PetscFunctionReturn(0);
3099 }
3100 
3101 #undef __FUNCT__
3102 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3103 /*
3104     Not great since it makes two copies of the submatrix, first an SeqAIJ
3105   in local and then by concatenating the local matrices the end result.
3106   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3107 
3108   Note: This requires a sequential iscol with all indices.
3109 */
3110 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3111 {
3112   PetscErrorCode ierr;
3113   PetscMPIInt    rank,size;
3114   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3115   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3116   Mat            *local,M,Mreuse;
3117   MatScalar      *vwork,*aa;
3118   MPI_Comm       comm = ((PetscObject)mat)->comm;
3119   Mat_SeqAIJ     *aij;
3120 
3121 
3122   PetscFunctionBegin;
3123   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3124   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3125 
3126   if (call ==  MAT_REUSE_MATRIX) {
3127     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3128     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3129     local = &Mreuse;
3130     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3131   } else {
3132     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3133     Mreuse = *local;
3134     ierr   = PetscFree(local);CHKERRQ(ierr);
3135   }
3136 
3137   /*
3138       m - number of local rows
3139       n - number of columns (same on all processors)
3140       rstart - first row in new global matrix generated
3141   */
3142   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3143   if (call == MAT_INITIAL_MATRIX) {
3144     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3145     ii  = aij->i;
3146     jj  = aij->j;
3147 
3148     /*
3149         Determine the number of non-zeros in the diagonal and off-diagonal
3150         portions of the matrix in order to do correct preallocation
3151     */
3152 
3153     /* first get start and end of "diagonal" columns */
3154     if (csize == PETSC_DECIDE) {
3155       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3156       if (mglobal == n) { /* square matrix */
3157 	nlocal = m;
3158       } else {
3159         nlocal = n/size + ((n % size) > rank);
3160       }
3161     } else {
3162       nlocal = csize;
3163     }
3164     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3165     rstart = rend - nlocal;
3166     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3167 
3168     /* next, compute all the lengths */
3169     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3170     olens = dlens + m;
3171     for (i=0; i<m; i++) {
3172       jend = ii[i+1] - ii[i];
3173       olen = 0;
3174       dlen = 0;
3175       for (j=0; j<jend; j++) {
3176         if (*jj < rstart || *jj >= rend) olen++;
3177         else dlen++;
3178         jj++;
3179       }
3180       olens[i] = olen;
3181       dlens[i] = dlen;
3182     }
3183     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3184     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3185     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3186     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3187     ierr = PetscFree(dlens);CHKERRQ(ierr);
3188   } else {
3189     PetscInt ml,nl;
3190 
3191     M = *newmat;
3192     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3193     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3194     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3195     /*
3196          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3197        rather than the slower MatSetValues().
3198     */
3199     M->was_assembled = PETSC_TRUE;
3200     M->assembled     = PETSC_FALSE;
3201   }
3202   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3203   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3204   ii  = aij->i;
3205   jj  = aij->j;
3206   aa  = aij->a;
3207   for (i=0; i<m; i++) {
3208     row   = rstart + i;
3209     nz    = ii[i+1] - ii[i];
3210     cwork = jj;     jj += nz;
3211     vwork = aa;     aa += nz;
3212     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3213   }
3214 
3215   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3216   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3217   *newmat = M;
3218 
3219   /* save submatrix used in processor for next request */
3220   if (call ==  MAT_INITIAL_MATRIX) {
3221     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3222     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3223   }
3224 
3225   PetscFunctionReturn(0);
3226 }
3227 
3228 EXTERN_C_BEGIN
3229 #undef __FUNCT__
3230 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3231 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3232 {
3233   PetscInt       m,cstart, cend,j,nnz,i,d;
3234   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3235   const PetscInt *JJ;
3236   PetscScalar    *values;
3237   PetscErrorCode ierr;
3238 
3239   PetscFunctionBegin;
3240   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3241 
3242   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3243   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3244   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3245   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3246   m      = B->rmap->n;
3247   cstart = B->cmap->rstart;
3248   cend   = B->cmap->rend;
3249   rstart = B->rmap->rstart;
3250 
3251   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3252 
3253 #if defined(PETSC_USE_DEBUGGING)
3254   for (i=0; i<m; i++) {
3255     nnz     = Ii[i+1]- Ii[i];
3256     JJ      = J + Ii[i];
3257     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3258     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3259     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3260   }
3261 #endif
3262 
3263   for (i=0; i<m; i++) {
3264     nnz     = Ii[i+1]- Ii[i];
3265     JJ      = J + Ii[i];
3266     nnz_max = PetscMax(nnz_max,nnz);
3267     d       = 0;
3268     for (j=0; j<nnz; j++) {
3269       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3270     }
3271     d_nnz[i] = d;
3272     o_nnz[i] = nnz - d;
3273   }
3274   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3275   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3276 
3277   if (v) values = (PetscScalar*)v;
3278   else {
3279     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3280     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3281   }
3282 
3283   for (i=0; i<m; i++) {
3284     ii   = i + rstart;
3285     nnz  = Ii[i+1]- Ii[i];
3286     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3287   }
3288   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3289   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3290 
3291   if (!v) {
3292     ierr = PetscFree(values);CHKERRQ(ierr);
3293   }
3294   PetscFunctionReturn(0);
3295 }
3296 EXTERN_C_END
3297 
3298 #undef __FUNCT__
3299 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3300 /*@
3301    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3302    (the default parallel PETSc format).
3303 
3304    Collective on MPI_Comm
3305 
3306    Input Parameters:
3307 +  B - the matrix
3308 .  i - the indices into j for the start of each local row (starts with zero)
3309 .  j - the column indices for each local row (starts with zero)
3310 -  v - optional values in the matrix
3311 
3312    Level: developer
3313 
3314    Notes:
3315        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3316      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3317      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3318 
3319        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3320 
3321        The format which is used for the sparse matrix input, is equivalent to a
3322     row-major ordering.. i.e for the following matrix, the input data expected is
3323     as shown:
3324 
3325         1 0 0
3326         2 0 3     P0
3327        -------
3328         4 5 6     P1
3329 
3330      Process0 [P0]: rows_owned=[0,1]
3331         i =  {0,1,3}  [size = nrow+1  = 2+1]
3332         j =  {0,0,2}  [size = nz = 6]
3333         v =  {1,2,3}  [size = nz = 6]
3334 
3335      Process1 [P1]: rows_owned=[2]
3336         i =  {0,3}    [size = nrow+1  = 1+1]
3337         j =  {0,1,2}  [size = nz = 6]
3338         v =  {4,5,6}  [size = nz = 6]
3339 
3340 .keywords: matrix, aij, compressed row, sparse, parallel
3341 
3342 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3343           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3344 @*/
3345 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3346 {
3347   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3348 
3349   PetscFunctionBegin;
3350   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3351   if (f) {
3352     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3353   }
3354   PetscFunctionReturn(0);
3355 }
3356 
3357 #undef __FUNCT__
3358 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3359 /*@C
3360    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3361    (the default parallel PETSc format).  For good matrix assembly performance
3362    the user should preallocate the matrix storage by setting the parameters
3363    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3364    performance can be increased by more than a factor of 50.
3365 
3366    Collective on MPI_Comm
3367 
3368    Input Parameters:
3369 +  A - the matrix
3370 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3371            (same value is used for all local rows)
3372 .  d_nnz - array containing the number of nonzeros in the various rows of the
3373            DIAGONAL portion of the local submatrix (possibly different for each row)
3374            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3375            The size of this array is equal to the number of local rows, i.e 'm'.
3376            You must leave room for the diagonal entry even if it is zero.
3377 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3378            submatrix (same value is used for all local rows).
3379 -  o_nnz - array containing the number of nonzeros in the various rows of the
3380            OFF-DIAGONAL portion of the local submatrix (possibly different for
3381            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3382            structure. The size of this array is equal to the number
3383            of local rows, i.e 'm'.
3384 
3385    If the *_nnz parameter is given then the *_nz parameter is ignored
3386 
3387    The AIJ format (also called the Yale sparse matrix format or
3388    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3389    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3390 
3391    The parallel matrix is partitioned such that the first m0 rows belong to
3392    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3393    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3394 
3395    The DIAGONAL portion of the local submatrix of a processor can be defined
3396    as the submatrix which is obtained by extraction the part corresponding
3397    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3398    first row that belongs to the processor, and r2 is the last row belonging
3399    to the this processor. This is a square mxm matrix. The remaining portion
3400    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3401 
3402    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3403 
3404    You can call MatGetInfo() to get information on how effective the preallocation was;
3405    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3406    You can also run with the option -info and look for messages with the string
3407    malloc in them to see if additional memory allocation was needed.
3408 
3409    Example usage:
3410 
3411    Consider the following 8x8 matrix with 34 non-zero values, that is
3412    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3413    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3414    as follows:
3415 
3416 .vb
3417             1  2  0  |  0  3  0  |  0  4
3418     Proc0   0  5  6  |  7  0  0  |  8  0
3419             9  0 10  | 11  0  0  | 12  0
3420     -------------------------------------
3421            13  0 14  | 15 16 17  |  0  0
3422     Proc1   0 18  0  | 19 20 21  |  0  0
3423             0  0  0  | 22 23  0  | 24  0
3424     -------------------------------------
3425     Proc2  25 26 27  |  0  0 28  | 29  0
3426            30  0  0  | 31 32 33  |  0 34
3427 .ve
3428 
3429    This can be represented as a collection of submatrices as:
3430 
3431 .vb
3432       A B C
3433       D E F
3434       G H I
3435 .ve
3436 
3437    Where the submatrices A,B,C are owned by proc0, D,E,F are
3438    owned by proc1, G,H,I are owned by proc2.
3439 
3440    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3441    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3442    The 'M','N' parameters are 8,8, and have the same values on all procs.
3443 
3444    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3445    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3446    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3447    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3448    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3449    matrix, ans [DF] as another SeqAIJ matrix.
3450 
3451    When d_nz, o_nz parameters are specified, d_nz storage elements are
3452    allocated for every row of the local diagonal submatrix, and o_nz
3453    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3454    One way to choose d_nz and o_nz is to use the max nonzerors per local
3455    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3456    In this case, the values of d_nz,o_nz are:
3457 .vb
3458      proc0 : dnz = 2, o_nz = 2
3459      proc1 : dnz = 3, o_nz = 2
3460      proc2 : dnz = 1, o_nz = 4
3461 .ve
3462    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3463    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3464    for proc3. i.e we are using 12+15+10=37 storage locations to store
3465    34 values.
3466 
3467    When d_nnz, o_nnz parameters are specified, the storage is specified
3468    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3469    In the above case the values for d_nnz,o_nnz are:
3470 .vb
3471      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3472      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3473      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3474 .ve
3475    Here the space allocated is sum of all the above values i.e 34, and
3476    hence pre-allocation is perfect.
3477 
3478    Level: intermediate
3479 
3480 .keywords: matrix, aij, compressed row, sparse, parallel
3481 
3482 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3483           MPIAIJ, MatGetInfo()
3484 @*/
3485 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3486 {
3487   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3488 
3489   PetscFunctionBegin;
3490   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3491   if (f) {
3492     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3493   }
3494   PetscFunctionReturn(0);
3495 }
3496 
3497 #undef __FUNCT__
3498 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3499 /*@
3500      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3501          CSR format the local rows.
3502 
3503    Collective on MPI_Comm
3504 
3505    Input Parameters:
3506 +  comm - MPI communicator
3507 .  m - number of local rows (Cannot be PETSC_DECIDE)
3508 .  n - This value should be the same as the local size used in creating the
3509        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3510        calculated if N is given) For square matrices n is almost always m.
3511 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3512 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3513 .   i - row indices
3514 .   j - column indices
3515 -   a - matrix values
3516 
3517    Output Parameter:
3518 .   mat - the matrix
3519 
3520    Level: intermediate
3521 
3522    Notes:
3523        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3524      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3525      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3526 
3527        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3528 
3529        The format which is used for the sparse matrix input, is equivalent to a
3530     row-major ordering.. i.e for the following matrix, the input data expected is
3531     as shown:
3532 
3533         1 0 0
3534         2 0 3     P0
3535        -------
3536         4 5 6     P1
3537 
3538      Process0 [P0]: rows_owned=[0,1]
3539         i =  {0,1,3}  [size = nrow+1  = 2+1]
3540         j =  {0,0,2}  [size = nz = 6]
3541         v =  {1,2,3}  [size = nz = 6]
3542 
3543      Process1 [P1]: rows_owned=[2]
3544         i =  {0,3}    [size = nrow+1  = 1+1]
3545         j =  {0,1,2}  [size = nz = 6]
3546         v =  {4,5,6}  [size = nz = 6]
3547 
3548 .keywords: matrix, aij, compressed row, sparse, parallel
3549 
3550 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3551           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3552 @*/
3553 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3554 {
3555   PetscErrorCode ierr;
3556 
3557  PetscFunctionBegin;
3558   if (i[0]) {
3559     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3560   }
3561   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3562   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3563   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3564   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3565   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3566   PetscFunctionReturn(0);
3567 }
3568 
3569 #undef __FUNCT__
3570 #define __FUNCT__ "MatCreateMPIAIJ"
3571 /*@C
3572    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3573    (the default parallel PETSc format).  For good matrix assembly performance
3574    the user should preallocate the matrix storage by setting the parameters
3575    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3576    performance can be increased by more than a factor of 50.
3577 
3578    Collective on MPI_Comm
3579 
3580    Input Parameters:
3581 +  comm - MPI communicator
3582 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3583            This value should be the same as the local size used in creating the
3584            y vector for the matrix-vector product y = Ax.
3585 .  n - This value should be the same as the local size used in creating the
3586        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3587        calculated if N is given) For square matrices n is almost always m.
3588 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3589 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3590 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3591            (same value is used for all local rows)
3592 .  d_nnz - array containing the number of nonzeros in the various rows of the
3593            DIAGONAL portion of the local submatrix (possibly different for each row)
3594            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3595            The size of this array is equal to the number of local rows, i.e 'm'.
3596            You must leave room for the diagonal entry even if it is zero.
3597 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3598            submatrix (same value is used for all local rows).
3599 -  o_nnz - array containing the number of nonzeros in the various rows of the
3600            OFF-DIAGONAL portion of the local submatrix (possibly different for
3601            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3602            structure. The size of this array is equal to the number
3603            of local rows, i.e 'm'.
3604 
3605    Output Parameter:
3606 .  A - the matrix
3607 
3608    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3609    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3610    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3611 
3612    Notes:
3613    If the *_nnz parameter is given then the *_nz parameter is ignored
3614 
3615    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3616    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3617    storage requirements for this matrix.
3618 
3619    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3620    processor than it must be used on all processors that share the object for
3621    that argument.
3622 
3623    The user MUST specify either the local or global matrix dimensions
3624    (possibly both).
3625 
3626    The parallel matrix is partitioned across processors such that the
3627    first m0 rows belong to process 0, the next m1 rows belong to
3628    process 1, the next m2 rows belong to process 2 etc.. where
3629    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3630    values corresponding to [m x N] submatrix.
3631 
3632    The columns are logically partitioned with the n0 columns belonging
3633    to 0th partition, the next n1 columns belonging to the next
3634    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3635 
3636    The DIAGONAL portion of the local submatrix on any given processor
3637    is the submatrix corresponding to the rows and columns m,n
3638    corresponding to the given processor. i.e diagonal matrix on
3639    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3640    etc. The remaining portion of the local submatrix [m x (N-n)]
3641    constitute the OFF-DIAGONAL portion. The example below better
3642    illustrates this concept.
3643 
3644    For a square global matrix we define each processor's diagonal portion
3645    to be its local rows and the corresponding columns (a square submatrix);
3646    each processor's off-diagonal portion encompasses the remainder of the
3647    local matrix (a rectangular submatrix).
3648 
3649    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3650 
3651    When calling this routine with a single process communicator, a matrix of
3652    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3653    type of communicator, use the construction mechanism:
3654      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3655 
3656    By default, this format uses inodes (identical nodes) when possible.
3657    We search for consecutive rows with the same nonzero structure, thereby
3658    reusing matrix information to achieve increased efficiency.
3659 
3660    Options Database Keys:
3661 +  -mat_no_inode  - Do not use inodes
3662 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3663 -  -mat_aij_oneindex - Internally use indexing starting at 1
3664         rather than 0.  Note that when calling MatSetValues(),
3665         the user still MUST index entries starting at 0!
3666 
3667 
3668    Example usage:
3669 
3670    Consider the following 8x8 matrix with 34 non-zero values, that is
3671    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3672    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3673    as follows:
3674 
3675 .vb
3676             1  2  0  |  0  3  0  |  0  4
3677     Proc0   0  5  6  |  7  0  0  |  8  0
3678             9  0 10  | 11  0  0  | 12  0
3679     -------------------------------------
3680            13  0 14  | 15 16 17  |  0  0
3681     Proc1   0 18  0  | 19 20 21  |  0  0
3682             0  0  0  | 22 23  0  | 24  0
3683     -------------------------------------
3684     Proc2  25 26 27  |  0  0 28  | 29  0
3685            30  0  0  | 31 32 33  |  0 34
3686 .ve
3687 
3688    This can be represented as a collection of submatrices as:
3689 
3690 .vb
3691       A B C
3692       D E F
3693       G H I
3694 .ve
3695 
3696    Where the submatrices A,B,C are owned by proc0, D,E,F are
3697    owned by proc1, G,H,I are owned by proc2.
3698 
3699    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3700    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3701    The 'M','N' parameters are 8,8, and have the same values on all procs.
3702 
3703    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3704    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3705    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3706    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3707    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3708    matrix, ans [DF] as another SeqAIJ matrix.
3709 
3710    When d_nz, o_nz parameters are specified, d_nz storage elements are
3711    allocated for every row of the local diagonal submatrix, and o_nz
3712    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3713    One way to choose d_nz and o_nz is to use the max nonzerors per local
3714    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3715    In this case, the values of d_nz,o_nz are:
3716 .vb
3717      proc0 : dnz = 2, o_nz = 2
3718      proc1 : dnz = 3, o_nz = 2
3719      proc2 : dnz = 1, o_nz = 4
3720 .ve
3721    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3722    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3723    for proc3. i.e we are using 12+15+10=37 storage locations to store
3724    34 values.
3725 
3726    When d_nnz, o_nnz parameters are specified, the storage is specified
3727    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3728    In the above case the values for d_nnz,o_nnz are:
3729 .vb
3730      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3731      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3732      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3733 .ve
3734    Here the space allocated is sum of all the above values i.e 34, and
3735    hence pre-allocation is perfect.
3736 
3737    Level: intermediate
3738 
3739 .keywords: matrix, aij, compressed row, sparse, parallel
3740 
3741 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3742           MPIAIJ, MatCreateMPIAIJWithArrays()
3743 @*/
3744 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3745 {
3746   PetscErrorCode ierr;
3747   PetscMPIInt    size;
3748 
3749   PetscFunctionBegin;
3750   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3751   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3752   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3753   if (size > 1) {
3754     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3755     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3756   } else {
3757     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3758     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3759   }
3760   PetscFunctionReturn(0);
3761 }
3762 
3763 #undef __FUNCT__
3764 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3765 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3766 {
3767   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3768 
3769   PetscFunctionBegin;
3770   *Ad     = a->A;
3771   *Ao     = a->B;
3772   *colmap = a->garray;
3773   PetscFunctionReturn(0);
3774 }
3775 
3776 #undef __FUNCT__
3777 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3778 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3779 {
3780   PetscErrorCode ierr;
3781   PetscInt       i;
3782   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3783 
3784   PetscFunctionBegin;
3785   if (coloring->ctype == IS_COLORING_GLOBAL) {
3786     ISColoringValue *allcolors,*colors;
3787     ISColoring      ocoloring;
3788 
3789     /* set coloring for diagonal portion */
3790     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3791 
3792     /* set coloring for off-diagonal portion */
3793     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3794     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3795     for (i=0; i<a->B->cmap->n; i++) {
3796       colors[i] = allcolors[a->garray[i]];
3797     }
3798     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3799     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3800     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3801     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3802   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3803     ISColoringValue *colors;
3804     PetscInt        *larray;
3805     ISColoring      ocoloring;
3806 
3807     /* set coloring for diagonal portion */
3808     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3809     for (i=0; i<a->A->cmap->n; i++) {
3810       larray[i] = i + A->cmap->rstart;
3811     }
3812     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3813     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3814     for (i=0; i<a->A->cmap->n; i++) {
3815       colors[i] = coloring->colors[larray[i]];
3816     }
3817     ierr = PetscFree(larray);CHKERRQ(ierr);
3818     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3819     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3820     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3821 
3822     /* set coloring for off-diagonal portion */
3823     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3824     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3825     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3826     for (i=0; i<a->B->cmap->n; i++) {
3827       colors[i] = coloring->colors[larray[i]];
3828     }
3829     ierr = PetscFree(larray);CHKERRQ(ierr);
3830     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3831     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3832     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3833   } else {
3834     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3835   }
3836 
3837   PetscFunctionReturn(0);
3838 }
3839 
3840 #if defined(PETSC_HAVE_ADIC)
3841 #undef __FUNCT__
3842 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3843 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3844 {
3845   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3846   PetscErrorCode ierr;
3847 
3848   PetscFunctionBegin;
3849   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3850   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3851   PetscFunctionReturn(0);
3852 }
3853 #endif
3854 
3855 #undef __FUNCT__
3856 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3857 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3858 {
3859   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3860   PetscErrorCode ierr;
3861 
3862   PetscFunctionBegin;
3863   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3864   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3865   PetscFunctionReturn(0);
3866 }
3867 
3868 #undef __FUNCT__
3869 #define __FUNCT__ "MatMerge"
3870 /*@
3871       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3872                  matrices from each processor
3873 
3874     Collective on MPI_Comm
3875 
3876    Input Parameters:
3877 +    comm - the communicators the parallel matrix will live on
3878 .    inmat - the input sequential matrices
3879 .    n - number of local columns (or PETSC_DECIDE)
3880 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3881 
3882    Output Parameter:
3883 .    outmat - the parallel matrix generated
3884 
3885     Level: advanced
3886 
3887    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3888 
3889 @*/
3890 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3891 {
3892   PetscErrorCode ierr;
3893   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3894   PetscInt       *indx;
3895   PetscScalar    *values;
3896 
3897   PetscFunctionBegin;
3898   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3899   if (scall == MAT_INITIAL_MATRIX){
3900     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3901     if (n == PETSC_DECIDE){
3902       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3903     }
3904     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3905     rstart -= m;
3906 
3907     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3908     for (i=0;i<m;i++) {
3909       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3910       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3911       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3912     }
3913     /* This routine will ONLY return MPIAIJ type matrix */
3914     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3915     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3916     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3917     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3918     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3919 
3920   } else if (scall == MAT_REUSE_MATRIX){
3921     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3922   } else {
3923     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3924   }
3925 
3926   for (i=0;i<m;i++) {
3927     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3928     Ii    = i + rstart;
3929     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3930     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3931   }
3932   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3933   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3934   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3935 
3936   PetscFunctionReturn(0);
3937 }
3938 
3939 #undef __FUNCT__
3940 #define __FUNCT__ "MatFileSplit"
3941 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3942 {
3943   PetscErrorCode    ierr;
3944   PetscMPIInt       rank;
3945   PetscInt          m,N,i,rstart,nnz;
3946   size_t            len;
3947   const PetscInt    *indx;
3948   PetscViewer       out;
3949   char              *name;
3950   Mat               B;
3951   const PetscScalar *values;
3952 
3953   PetscFunctionBegin;
3954   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3955   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3956   /* Should this be the type of the diagonal block of A? */
3957   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3958   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3959   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3960   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3961   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3962   for (i=0;i<m;i++) {
3963     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3964     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3965     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3966   }
3967   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3968   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3969 
3970   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3971   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3972   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3973   sprintf(name,"%s.%d",outfile,rank);
3974   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3975   ierr = PetscFree(name);
3976   ierr = MatView(B,out);CHKERRQ(ierr);
3977   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3978   ierr = MatDestroy(B);CHKERRQ(ierr);
3979   PetscFunctionReturn(0);
3980 }
3981 
3982 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3983 #undef __FUNCT__
3984 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3985 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3986 {
3987   PetscErrorCode       ierr;
3988   Mat_Merge_SeqsToMPI  *merge;
3989   PetscContainer       container;
3990 
3991   PetscFunctionBegin;
3992   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3993   if (container) {
3994     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3995     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3996     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3997     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3998     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3999     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4000     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4001     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4002     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4003     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4004     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4005     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4006     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4007     ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr);
4008 
4009     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4010     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4011   }
4012   ierr = PetscFree(merge);CHKERRQ(ierr);
4013 
4014   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4015   PetscFunctionReturn(0);
4016 }
4017 
4018 #include "../src/mat/utils/freespace.h"
4019 #include "petscbt.h"
4020 
4021 #undef __FUNCT__
4022 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4023 /*@C
4024       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4025                  matrices from each processor
4026 
4027     Collective on MPI_Comm
4028 
4029    Input Parameters:
4030 +    comm - the communicators the parallel matrix will live on
4031 .    seqmat - the input sequential matrices
4032 .    m - number of local rows (or PETSC_DECIDE)
4033 .    n - number of local columns (or PETSC_DECIDE)
4034 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4035 
4036    Output Parameter:
4037 .    mpimat - the parallel matrix generated
4038 
4039     Level: advanced
4040 
4041    Notes:
4042      The dimensions of the sequential matrix in each processor MUST be the same.
4043      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4044      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4045 @*/
4046 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4047 {
4048   PetscErrorCode       ierr;
4049   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4050   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4051   PetscMPIInt          size,rank,taga,*len_s;
4052   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4053   PetscInt             proc,m;
4054   PetscInt             **buf_ri,**buf_rj;
4055   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4056   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4057   MPI_Request          *s_waits,*r_waits;
4058   MPI_Status           *status;
4059   MatScalar            *aa=a->a;
4060   MatScalar            **abuf_r,*ba_i;
4061   Mat_Merge_SeqsToMPI  *merge;
4062   PetscContainer       container;
4063 
4064   PetscFunctionBegin;
4065   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4066 
4067   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4068   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4069 
4070   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4071   if (container) {
4072     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4073   }
4074   bi     = merge->bi;
4075   bj     = merge->bj;
4076   buf_ri = merge->buf_ri;
4077   buf_rj = merge->buf_rj;
4078 
4079   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4080   owners = merge->rowmap->range;
4081   len_s  = merge->len_s;
4082 
4083   /* send and recv matrix values */
4084   /*-----------------------------*/
4085   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4086   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4087 
4088   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4089   for (proc=0,k=0; proc<size; proc++){
4090     if (!len_s[proc]) continue;
4091     i = owners[proc];
4092     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4093     k++;
4094   }
4095 
4096   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4097   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4098   ierr = PetscFree(status);CHKERRQ(ierr);
4099 
4100   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4101   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4102 
4103   /* insert mat values of mpimat */
4104   /*----------------------------*/
4105   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4106   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4107 
4108   for (k=0; k<merge->nrecv; k++){
4109     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4110     nrows = *(buf_ri_k[k]);
4111     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4112     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4113   }
4114 
4115   /* set values of ba */
4116   m = merge->rowmap->n;
4117   for (i=0; i<m; i++) {
4118     arow = owners[rank] + i;
4119     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4120     bnzi = bi[i+1] - bi[i];
4121     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4122 
4123     /* add local non-zero vals of this proc's seqmat into ba */
4124     anzi = ai[arow+1] - ai[arow];
4125     aj   = a->j + ai[arow];
4126     aa   = a->a + ai[arow];
4127     nextaj = 0;
4128     for (j=0; nextaj<anzi; j++){
4129       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4130         ba_i[j] += aa[nextaj++];
4131       }
4132     }
4133 
4134     /* add received vals into ba */
4135     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4136       /* i-th row */
4137       if (i == *nextrow[k]) {
4138         anzi = *(nextai[k]+1) - *nextai[k];
4139         aj   = buf_rj[k] + *(nextai[k]);
4140         aa   = abuf_r[k] + *(nextai[k]);
4141         nextaj = 0;
4142         for (j=0; nextaj<anzi; j++){
4143           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4144             ba_i[j] += aa[nextaj++];
4145           }
4146         }
4147         nextrow[k]++; nextai[k]++;
4148       }
4149     }
4150     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4151   }
4152   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4153   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4154 
4155   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4156   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4157   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4158   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4159   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4160   PetscFunctionReturn(0);
4161 }
4162 
4163 #undef __FUNCT__
4164 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4165 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4166 {
4167   PetscErrorCode       ierr;
4168   Mat                  B_mpi;
4169   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4170   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4171   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4172   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4173   PetscInt             len,proc,*dnz,*onz;
4174   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4175   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4176   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4177   MPI_Status           *status;
4178   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4179   PetscBT              lnkbt;
4180   Mat_Merge_SeqsToMPI  *merge;
4181   PetscContainer       container;
4182 
4183   PetscFunctionBegin;
4184   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4185 
4186   /* make sure it is a PETSc comm */
4187   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4188   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4189   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4190 
4191   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4192   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4193 
4194   /* determine row ownership */
4195   /*---------------------------------------------------------*/
4196   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4197   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4198   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4199   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4200   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4201   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4202   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4203 
4204   m      = merge->rowmap->n;
4205   M      = merge->rowmap->N;
4206   owners = merge->rowmap->range;
4207 
4208   /* determine the number of messages to send, their lengths */
4209   /*---------------------------------------------------------*/
4210   len_s  = merge->len_s;
4211 
4212   len = 0;  /* length of buf_si[] */
4213   merge->nsend = 0;
4214   for (proc=0; proc<size; proc++){
4215     len_si[proc] = 0;
4216     if (proc == rank){
4217       len_s[proc] = 0;
4218     } else {
4219       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4220       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4221     }
4222     if (len_s[proc]) {
4223       merge->nsend++;
4224       nrows = 0;
4225       for (i=owners[proc]; i<owners[proc+1]; i++){
4226         if (ai[i+1] > ai[i]) nrows++;
4227       }
4228       len_si[proc] = 2*(nrows+1);
4229       len += len_si[proc];
4230     }
4231   }
4232 
4233   /* determine the number and length of messages to receive for ij-structure */
4234   /*-------------------------------------------------------------------------*/
4235   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4236   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4237 
4238   /* post the Irecv of j-structure */
4239   /*-------------------------------*/
4240   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4241   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4242 
4243   /* post the Isend of j-structure */
4244   /*--------------------------------*/
4245   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4246 
4247   for (proc=0, k=0; proc<size; proc++){
4248     if (!len_s[proc]) continue;
4249     i = owners[proc];
4250     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4251     k++;
4252   }
4253 
4254   /* receives and sends of j-structure are complete */
4255   /*------------------------------------------------*/
4256   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4257   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4258 
4259   /* send and recv i-structure */
4260   /*---------------------------*/
4261   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4262   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4263 
4264   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4265   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4266   for (proc=0,k=0; proc<size; proc++){
4267     if (!len_s[proc]) continue;
4268     /* form outgoing message for i-structure:
4269          buf_si[0]:                 nrows to be sent
4270                [1:nrows]:           row index (global)
4271                [nrows+1:2*nrows+1]: i-structure index
4272     */
4273     /*-------------------------------------------*/
4274     nrows = len_si[proc]/2 - 1;
4275     buf_si_i    = buf_si + nrows+1;
4276     buf_si[0]   = nrows;
4277     buf_si_i[0] = 0;
4278     nrows = 0;
4279     for (i=owners[proc]; i<owners[proc+1]; i++){
4280       anzi = ai[i+1] - ai[i];
4281       if (anzi) {
4282         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4283         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4284         nrows++;
4285       }
4286     }
4287     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4288     k++;
4289     buf_si += len_si[proc];
4290   }
4291 
4292   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4293   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4294 
4295   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4296   for (i=0; i<merge->nrecv; i++){
4297     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4298   }
4299 
4300   ierr = PetscFree(len_si);CHKERRQ(ierr);
4301   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4302   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4303   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4304   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4305   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4306   ierr = PetscFree(status);CHKERRQ(ierr);
4307 
4308   /* compute a local seq matrix in each processor */
4309   /*----------------------------------------------*/
4310   /* allocate bi array and free space for accumulating nonzero column info */
4311   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4312   bi[0] = 0;
4313 
4314   /* create and initialize a linked list */
4315   nlnk = N+1;
4316   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4317 
4318   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4319   len = 0;
4320   len  = ai[owners[rank+1]] - ai[owners[rank]];
4321   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4322   current_space = free_space;
4323 
4324   /* determine symbolic info for each local row */
4325   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4326 
4327   for (k=0; k<merge->nrecv; k++){
4328     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4329     nrows = *buf_ri_k[k];
4330     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4331     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4332   }
4333 
4334   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4335   len = 0;
4336   for (i=0;i<m;i++) {
4337     bnzi   = 0;
4338     /* add local non-zero cols of this proc's seqmat into lnk */
4339     arow   = owners[rank] + i;
4340     anzi   = ai[arow+1] - ai[arow];
4341     aj     = a->j + ai[arow];
4342     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4343     bnzi += nlnk;
4344     /* add received col data into lnk */
4345     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4346       if (i == *nextrow[k]) { /* i-th row */
4347         anzi = *(nextai[k]+1) - *nextai[k];
4348         aj   = buf_rj[k] + *nextai[k];
4349         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4350         bnzi += nlnk;
4351         nextrow[k]++; nextai[k]++;
4352       }
4353     }
4354     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4355 
4356     /* if free space is not available, make more free space */
4357     if (current_space->local_remaining<bnzi) {
4358       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4359       nspacedouble++;
4360     }
4361     /* copy data into free space, then initialize lnk */
4362     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4363     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4364 
4365     current_space->array           += bnzi;
4366     current_space->local_used      += bnzi;
4367     current_space->local_remaining -= bnzi;
4368 
4369     bi[i+1] = bi[i] + bnzi;
4370   }
4371 
4372   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4373 
4374   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4375   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4376   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4377 
4378   /* create symbolic parallel matrix B_mpi */
4379   /*---------------------------------------*/
4380   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4381   if (n==PETSC_DECIDE) {
4382     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4383   } else {
4384     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4385   }
4386   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4387   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4388   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4389 
4390   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4391   B_mpi->assembled     = PETSC_FALSE;
4392   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4393   merge->bi            = bi;
4394   merge->bj            = bj;
4395   merge->buf_ri        = buf_ri;
4396   merge->buf_rj        = buf_rj;
4397   merge->coi           = PETSC_NULL;
4398   merge->coj           = PETSC_NULL;
4399   merge->owners_co     = PETSC_NULL;
4400 
4401   /* attach the supporting struct to B_mpi for reuse */
4402   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4403   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4404   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4405   *mpimat = B_mpi;
4406 
4407   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4408   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4409   PetscFunctionReturn(0);
4410 }
4411 
4412 #undef __FUNCT__
4413 #define __FUNCT__ "MatMerge_SeqsToMPI"
4414 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4415 {
4416   PetscErrorCode   ierr;
4417 
4418   PetscFunctionBegin;
4419   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4420   if (scall == MAT_INITIAL_MATRIX){
4421     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4422   }
4423   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4424   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4425   PetscFunctionReturn(0);
4426 }
4427 
4428 #undef __FUNCT__
4429 #define __FUNCT__ "MatGetLocalMat"
4430 /*@
4431      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4432 
4433     Not Collective
4434 
4435    Input Parameters:
4436 +    A - the matrix
4437 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4438 
4439    Output Parameter:
4440 .    A_loc - the local sequential matrix generated
4441 
4442     Level: developer
4443 
4444 @*/
4445 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4446 {
4447   PetscErrorCode  ierr;
4448   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4449   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4450   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4451   MatScalar       *aa=a->a,*ba=b->a,*cam;
4452   PetscScalar     *ca;
4453   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4454   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4455 
4456   PetscFunctionBegin;
4457   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4458   if (scall == MAT_INITIAL_MATRIX){
4459     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4460     ci[0] = 0;
4461     for (i=0; i<am; i++){
4462       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4463     }
4464     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4465     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4466     k = 0;
4467     for (i=0; i<am; i++) {
4468       ncols_o = bi[i+1] - bi[i];
4469       ncols_d = ai[i+1] - ai[i];
4470       /* off-diagonal portion of A */
4471       for (jo=0; jo<ncols_o; jo++) {
4472         col = cmap[*bj];
4473         if (col >= cstart) break;
4474         cj[k]   = col; bj++;
4475         ca[k++] = *ba++;
4476       }
4477       /* diagonal portion of A */
4478       for (j=0; j<ncols_d; j++) {
4479         cj[k]   = cstart + *aj++;
4480         ca[k++] = *aa++;
4481       }
4482       /* off-diagonal portion of A */
4483       for (j=jo; j<ncols_o; j++) {
4484         cj[k]   = cmap[*bj++];
4485         ca[k++] = *ba++;
4486       }
4487     }
4488     /* put together the new matrix */
4489     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4490     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4491     /* Since these are PETSc arrays, change flags to free them as necessary. */
4492     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4493     mat->free_a  = PETSC_TRUE;
4494     mat->free_ij = PETSC_TRUE;
4495     mat->nonew   = 0;
4496   } else if (scall == MAT_REUSE_MATRIX){
4497     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4498     ci = mat->i; cj = mat->j; cam = mat->a;
4499     for (i=0; i<am; i++) {
4500       /* off-diagonal portion of A */
4501       ncols_o = bi[i+1] - bi[i];
4502       for (jo=0; jo<ncols_o; jo++) {
4503         col = cmap[*bj];
4504         if (col >= cstart) break;
4505         *cam++ = *ba++; bj++;
4506       }
4507       /* diagonal portion of A */
4508       ncols_d = ai[i+1] - ai[i];
4509       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4510       /* off-diagonal portion of A */
4511       for (j=jo; j<ncols_o; j++) {
4512         *cam++ = *ba++; bj++;
4513       }
4514     }
4515   } else {
4516     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4517   }
4518 
4519   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4520   PetscFunctionReturn(0);
4521 }
4522 
4523 #undef __FUNCT__
4524 #define __FUNCT__ "MatGetLocalMatCondensed"
4525 /*@C
4526      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4527 
4528     Not Collective
4529 
4530    Input Parameters:
4531 +    A - the matrix
4532 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4533 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4534 
4535    Output Parameter:
4536 .    A_loc - the local sequential matrix generated
4537 
4538     Level: developer
4539 
4540 @*/
4541 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4542 {
4543   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4544   PetscErrorCode    ierr;
4545   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4546   IS                isrowa,iscola;
4547   Mat               *aloc;
4548 
4549   PetscFunctionBegin;
4550   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4551   if (!row){
4552     start = A->rmap->rstart; end = A->rmap->rend;
4553     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4554   } else {
4555     isrowa = *row;
4556   }
4557   if (!col){
4558     start = A->cmap->rstart;
4559     cmap  = a->garray;
4560     nzA   = a->A->cmap->n;
4561     nzB   = a->B->cmap->n;
4562     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4563     ncols = 0;
4564     for (i=0; i<nzB; i++) {
4565       if (cmap[i] < start) idx[ncols++] = cmap[i];
4566       else break;
4567     }
4568     imark = i;
4569     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4570     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4571     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4572     ierr = PetscFree(idx);CHKERRQ(ierr);
4573   } else {
4574     iscola = *col;
4575   }
4576   if (scall != MAT_INITIAL_MATRIX){
4577     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4578     aloc[0] = *A_loc;
4579   }
4580   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4581   *A_loc = aloc[0];
4582   ierr = PetscFree(aloc);CHKERRQ(ierr);
4583   if (!row){
4584     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4585   }
4586   if (!col){
4587     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4588   }
4589   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4590   PetscFunctionReturn(0);
4591 }
4592 
4593 #undef __FUNCT__
4594 #define __FUNCT__ "MatGetBrowsOfAcols"
4595 /*@C
4596     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4597 
4598     Collective on Mat
4599 
4600    Input Parameters:
4601 +    A,B - the matrices in mpiaij format
4602 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4603 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4604 
4605    Output Parameter:
4606 +    rowb, colb - index sets of rows and columns of B to extract
4607 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4608 -    B_seq - the sequential matrix generated
4609 
4610     Level: developer
4611 
4612 @*/
4613 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4614 {
4615   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4616   PetscErrorCode    ierr;
4617   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4618   IS                isrowb,iscolb;
4619   Mat               *bseq;
4620 
4621   PetscFunctionBegin;
4622   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4623     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4624   }
4625   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4626 
4627   if (scall == MAT_INITIAL_MATRIX){
4628     start = A->cmap->rstart;
4629     cmap  = a->garray;
4630     nzA   = a->A->cmap->n;
4631     nzB   = a->B->cmap->n;
4632     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4633     ncols = 0;
4634     for (i=0; i<nzB; i++) {  /* row < local row index */
4635       if (cmap[i] < start) idx[ncols++] = cmap[i];
4636       else break;
4637     }
4638     imark = i;
4639     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4640     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4641     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4642     ierr = PetscFree(idx);CHKERRQ(ierr);
4643     *brstart = imark;
4644     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4645   } else {
4646     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4647     isrowb = *rowb; iscolb = *colb;
4648     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4649     bseq[0] = *B_seq;
4650   }
4651   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4652   *B_seq = bseq[0];
4653   ierr = PetscFree(bseq);CHKERRQ(ierr);
4654   if (!rowb){
4655     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4656   } else {
4657     *rowb = isrowb;
4658   }
4659   if (!colb){
4660     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4661   } else {
4662     *colb = iscolb;
4663   }
4664   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4665   PetscFunctionReturn(0);
4666 }
4667 
4668 #undef __FUNCT__
4669 #define __FUNCT__ "MatGetBrowsOfAoCols"
4670 /*@C
4671     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4672     of the OFF-DIAGONAL portion of local A
4673 
4674     Collective on Mat
4675 
4676    Input Parameters:
4677 +    A,B - the matrices in mpiaij format
4678 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4679 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4680 .    startsj_r - similar to startsj for receives
4681 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4682 
4683    Output Parameter:
4684 +    B_oth - the sequential matrix generated
4685 
4686     Level: developer
4687 
4688 @*/
4689 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4690 {
4691   VecScatter_MPI_General *gen_to,*gen_from;
4692   PetscErrorCode         ierr;
4693   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4694   Mat_SeqAIJ             *b_oth;
4695   VecScatter             ctx=a->Mvctx;
4696   MPI_Comm               comm=((PetscObject)ctx)->comm;
4697   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4698   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4699   PetscScalar            *rvalues,*svalues;
4700   MatScalar              *b_otha,*bufa,*bufA;
4701   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4702   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4703   MPI_Status             *sstatus,rstatus;
4704   PetscMPIInt            jj;
4705   PetscInt               *cols,sbs,rbs;
4706   PetscScalar            *vals;
4707 
4708   PetscFunctionBegin;
4709   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4710     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4711   }
4712   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4713   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4714 
4715   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4716   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4717   rvalues  = gen_from->values; /* holds the length of receiving row */
4718   svalues  = gen_to->values;   /* holds the length of sending row */
4719   nrecvs   = gen_from->n;
4720   nsends   = gen_to->n;
4721 
4722   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4723   srow     = gen_to->indices;   /* local row index to be sent */
4724   sstarts  = gen_to->starts;
4725   sprocs   = gen_to->procs;
4726   sstatus  = gen_to->sstatus;
4727   sbs      = gen_to->bs;
4728   rstarts  = gen_from->starts;
4729   rprocs   = gen_from->procs;
4730   rbs      = gen_from->bs;
4731 
4732   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4733   if (scall == MAT_INITIAL_MATRIX){
4734     /* i-array */
4735     /*---------*/
4736     /*  post receives */
4737     for (i=0; i<nrecvs; i++){
4738       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4739       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4740       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4741     }
4742 
4743     /* pack the outgoing message */
4744     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
4745     sstartsj[0] = 0;  rstartsj[0] = 0;
4746     len = 0; /* total length of j or a array to be sent */
4747     k = 0;
4748     for (i=0; i<nsends; i++){
4749       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4750       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4751       for (j=0; j<nrows; j++) {
4752         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4753         for (l=0; l<sbs; l++){
4754           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4755           rowlen[j*sbs+l] = ncols;
4756           len += ncols;
4757           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4758         }
4759         k++;
4760       }
4761       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4762       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4763     }
4764     /* recvs and sends of i-array are completed */
4765     i = nrecvs;
4766     while (i--) {
4767       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4768     }
4769     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4770 
4771     /* allocate buffers for sending j and a arrays */
4772     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4773     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4774 
4775     /* create i-array of B_oth */
4776     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4777     b_othi[0] = 0;
4778     len = 0; /* total length of j or a array to be received */
4779     k = 0;
4780     for (i=0; i<nrecvs; i++){
4781       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4782       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4783       for (j=0; j<nrows; j++) {
4784         b_othi[k+1] = b_othi[k] + rowlen[j];
4785         len += rowlen[j]; k++;
4786       }
4787       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4788     }
4789 
4790     /* allocate space for j and a arrrays of B_oth */
4791     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4792     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4793 
4794     /* j-array */
4795     /*---------*/
4796     /*  post receives of j-array */
4797     for (i=0; i<nrecvs; i++){
4798       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4799       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4800     }
4801 
4802     /* pack the outgoing message j-array */
4803     k = 0;
4804     for (i=0; i<nsends; i++){
4805       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4806       bufJ = bufj+sstartsj[i];
4807       for (j=0; j<nrows; j++) {
4808         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4809         for (ll=0; ll<sbs; ll++){
4810           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4811           for (l=0; l<ncols; l++){
4812             *bufJ++ = cols[l];
4813           }
4814           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4815         }
4816       }
4817       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4818     }
4819 
4820     /* recvs and sends of j-array are completed */
4821     i = nrecvs;
4822     while (i--) {
4823       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4824     }
4825     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4826   } else if (scall == MAT_REUSE_MATRIX){
4827     sstartsj = *startsj;
4828     rstartsj = *startsj_r;
4829     bufa     = *bufa_ptr;
4830     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4831     b_otha   = b_oth->a;
4832   } else {
4833     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4834   }
4835 
4836   /* a-array */
4837   /*---------*/
4838   /*  post receives of a-array */
4839   for (i=0; i<nrecvs; i++){
4840     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4841     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4842   }
4843 
4844   /* pack the outgoing message a-array */
4845   k = 0;
4846   for (i=0; i<nsends; i++){
4847     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4848     bufA = bufa+sstartsj[i];
4849     for (j=0; j<nrows; j++) {
4850       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4851       for (ll=0; ll<sbs; ll++){
4852         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4853         for (l=0; l<ncols; l++){
4854           *bufA++ = vals[l];
4855         }
4856         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4857       }
4858     }
4859     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4860   }
4861   /* recvs and sends of a-array are completed */
4862   i = nrecvs;
4863   while (i--) {
4864     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4865   }
4866   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4867   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4868 
4869   if (scall == MAT_INITIAL_MATRIX){
4870     /* put together the new matrix */
4871     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4872 
4873     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4874     /* Since these are PETSc arrays, change flags to free them as necessary. */
4875     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4876     b_oth->free_a  = PETSC_TRUE;
4877     b_oth->free_ij = PETSC_TRUE;
4878     b_oth->nonew   = 0;
4879 
4880     ierr = PetscFree(bufj);CHKERRQ(ierr);
4881     if (!startsj || !bufa_ptr){
4882       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4883       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4884     } else {
4885       *startsj   = sstartsj;
4886       *startsj_r = rstartsj;
4887       *bufa_ptr  = bufa;
4888     }
4889   }
4890   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4891   PetscFunctionReturn(0);
4892 }
4893 
4894 #undef __FUNCT__
4895 #define __FUNCT__ "MatGetCommunicationStructs"
4896 /*@C
4897   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4898 
4899   Not Collective
4900 
4901   Input Parameters:
4902 . A - The matrix in mpiaij format
4903 
4904   Output Parameter:
4905 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4906 . colmap - A map from global column index to local index into lvec
4907 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4908 
4909   Level: developer
4910 
4911 @*/
4912 #if defined (PETSC_USE_CTABLE)
4913 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4914 #else
4915 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4916 #endif
4917 {
4918   Mat_MPIAIJ *a;
4919 
4920   PetscFunctionBegin;
4921   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4922   PetscValidPointer(lvec, 2);
4923   PetscValidPointer(colmap, 3);
4924   PetscValidPointer(multScatter, 4);
4925   a = (Mat_MPIAIJ *) A->data;
4926   if (lvec) *lvec = a->lvec;
4927   if (colmap) *colmap = a->colmap;
4928   if (multScatter) *multScatter = a->Mvctx;
4929   PetscFunctionReturn(0);
4930 }
4931 
4932 EXTERN_C_BEGIN
4933 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*);
4934 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*);
4935 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
4936 EXTERN_C_END
4937 
4938 #undef __FUNCT__
4939 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4940 /*
4941     Computes (B'*A')' since computing B*A directly is untenable
4942 
4943                n                       p                          p
4944         (              )       (              )         (                  )
4945       m (      A       )  *  n (       B      )   =   m (         C        )
4946         (              )       (              )         (                  )
4947 
4948 */
4949 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4950 {
4951   PetscErrorCode     ierr;
4952   Mat                At,Bt,Ct;
4953 
4954   PetscFunctionBegin;
4955   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4956   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4957   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4958   ierr = MatDestroy(At);CHKERRQ(ierr);
4959   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4960   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4961   ierr = MatDestroy(Ct);CHKERRQ(ierr);
4962   PetscFunctionReturn(0);
4963 }
4964 
4965 #undef __FUNCT__
4966 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4967 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4968 {
4969   PetscErrorCode ierr;
4970   PetscInt       m=A->rmap->n,n=B->cmap->n;
4971   Mat            Cmat;
4972 
4973   PetscFunctionBegin;
4974   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4975   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
4976   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4977   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4978   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
4979   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4980   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4981   *C   = Cmat;
4982   PetscFunctionReturn(0);
4983 }
4984 
4985 /* ----------------------------------------------------------------*/
4986 #undef __FUNCT__
4987 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4988 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4989 {
4990   PetscErrorCode ierr;
4991 
4992   PetscFunctionBegin;
4993   if (scall == MAT_INITIAL_MATRIX){
4994     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4995   }
4996   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4997   PetscFunctionReturn(0);
4998 }
4999 
5000 EXTERN_C_BEGIN
5001 #if defined(PETSC_HAVE_MUMPS)
5002 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5003 #endif
5004 #if defined(PETSC_HAVE_PASTIX)
5005 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5006 #endif
5007 #if defined(PETSC_HAVE_SUPERLU_DIST)
5008 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5009 #endif
5010 #if defined(PETSC_HAVE_SPOOLES)
5011 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5012 #endif
5013 EXTERN_C_END
5014 
5015 /*MC
5016    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5017 
5018    Options Database Keys:
5019 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5020 
5021   Level: beginner
5022 
5023 .seealso: MatCreateMPIAIJ()
5024 M*/
5025 
5026 EXTERN_C_BEGIN
5027 #undef __FUNCT__
5028 #define __FUNCT__ "MatCreate_MPIAIJ"
5029 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
5030 {
5031   Mat_MPIAIJ     *b;
5032   PetscErrorCode ierr;
5033   PetscMPIInt    size;
5034 
5035   PetscFunctionBegin;
5036   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5037 
5038   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5039   B->data         = (void*)b;
5040   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5041   B->rmap->bs     = 1;
5042   B->assembled    = PETSC_FALSE;
5043   B->mapping      = 0;
5044 
5045   B->insertmode   = NOT_SET_VALUES;
5046   b->size         = size;
5047   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5048 
5049   /* build cache for off array entries formed */
5050   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5051   b->donotstash  = PETSC_FALSE;
5052   b->colmap      = 0;
5053   b->garray      = 0;
5054   b->roworiented = PETSC_TRUE;
5055 
5056   /* stuff used for matrix vector multiply */
5057   b->lvec      = PETSC_NULL;
5058   b->Mvctx     = PETSC_NULL;
5059 
5060   /* stuff for MatGetRow() */
5061   b->rowindices   = 0;
5062   b->rowvalues    = 0;
5063   b->getrowactive = PETSC_FALSE;
5064 
5065 #if defined(PETSC_HAVE_SPOOLES)
5066   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5067                                      "MatGetFactor_mpiaij_spooles",
5068                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5069 #endif
5070 #if defined(PETSC_HAVE_MUMPS)
5071   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5072                                      "MatGetFactor_aij_mumps",
5073                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5074 #endif
5075 #if defined(PETSC_HAVE_PASTIX)
5076   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5077 					   "MatGetFactor_mpiaij_pastix",
5078 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5079 #endif
5080 #if defined(PETSC_HAVE_SUPERLU_DIST)
5081   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5082                                      "MatGetFactor_mpiaij_superlu_dist",
5083                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5084 #endif
5085   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5086                                      "MatStoreValues_MPIAIJ",
5087                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5088   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5089                                      "MatRetrieveValues_MPIAIJ",
5090                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5091   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5092 				     "MatGetDiagonalBlock_MPIAIJ",
5093                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5094   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5095 				     "MatIsTranspose_MPIAIJ",
5096 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5097   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5098 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5099 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5100   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5101 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5102 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5103   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5104 				     "MatDiagonalScaleLocal_MPIAIJ",
5105 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5106   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
5107                                      "MatConvert_MPIAIJ_MPICSRPERM",
5108                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
5109   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
5110                                      "MatConvert_MPIAIJ_MPICRL",
5111                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
5112   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5113                                      "MatConvert_MPIAIJ_MPISBAIJ",
5114                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5115   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5116                                      "MatMatMult_MPIDense_MPIAIJ",
5117                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5118   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5119                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5120                                       MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5121   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5122                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5123                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5124   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5125   PetscFunctionReturn(0);
5126 }
5127 EXTERN_C_END
5128 
5129 #undef __FUNCT__
5130 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5131 /*@
5132      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5133          and "off-diagonal" part of the matrix in CSR format.
5134 
5135    Collective on MPI_Comm
5136 
5137    Input Parameters:
5138 +  comm - MPI communicator
5139 .  m - number of local rows (Cannot be PETSC_DECIDE)
5140 .  n - This value should be the same as the local size used in creating the
5141        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5142        calculated if N is given) For square matrices n is almost always m.
5143 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5144 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5145 .   i - row indices for "diagonal" portion of matrix
5146 .   j - column indices
5147 .   a - matrix values
5148 .   oi - row indices for "off-diagonal" portion of matrix
5149 .   oj - column indices
5150 -   oa - matrix values
5151 
5152    Output Parameter:
5153 .   mat - the matrix
5154 
5155    Level: advanced
5156 
5157    Notes:
5158        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5159 
5160        The i and j indices are 0 based
5161 
5162        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5163 
5164        This sets local rows and cannot be used to set off-processor values.
5165 
5166        You cannot later use MatSetValues() to change values in this matrix.
5167 
5168 .keywords: matrix, aij, compressed row, sparse, parallel
5169 
5170 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5171           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5172 @*/
5173 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5174 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5175 {
5176   PetscErrorCode ierr;
5177   Mat_MPIAIJ     *maij;
5178 
5179  PetscFunctionBegin;
5180   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5181   if (i[0]) {
5182     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5183   }
5184   if (oi[0]) {
5185     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5186   }
5187   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5188   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5189   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5190   maij = (Mat_MPIAIJ*) (*mat)->data;
5191   maij->donotstash     = PETSC_TRUE;
5192   (*mat)->preallocated = PETSC_TRUE;
5193 
5194   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5195   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5196   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5197   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5198 
5199   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5200   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5201 
5202   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5203   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5204   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5205   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5206 
5207   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5208   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5209   PetscFunctionReturn(0);
5210 }
5211 
5212 /*
5213     Special version for direct calls from Fortran
5214 */
5215 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5216 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5217 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5218 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5219 #endif
5220 
5221 /* Change these macros so can be used in void function */
5222 #undef CHKERRQ
5223 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5224 #undef SETERRQ2
5225 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5226 #undef SETERRQ
5227 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5228 
5229 EXTERN_C_BEGIN
5230 #undef __FUNCT__
5231 #define __FUNCT__ "matsetvaluesmpiaij_"
5232 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5233 {
5234   Mat             mat = *mmat;
5235   PetscInt        m = *mm, n = *mn;
5236   InsertMode      addv = *maddv;
5237   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5238   PetscScalar     value;
5239   PetscErrorCode  ierr;
5240 
5241   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5242   if (mat->insertmode == NOT_SET_VALUES) {
5243     mat->insertmode = addv;
5244   }
5245 #if defined(PETSC_USE_DEBUG)
5246   else if (mat->insertmode != addv) {
5247     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5248   }
5249 #endif
5250   {
5251   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5252   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5253   PetscTruth      roworiented = aij->roworiented;
5254 
5255   /* Some Variables required in the macro */
5256   Mat             A = aij->A;
5257   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5258   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5259   MatScalar       *aa = a->a;
5260   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5261   Mat             B = aij->B;
5262   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5263   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5264   MatScalar       *ba = b->a;
5265 
5266   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5267   PetscInt        nonew = a->nonew;
5268   MatScalar       *ap1,*ap2;
5269 
5270   PetscFunctionBegin;
5271   for (i=0; i<m; i++) {
5272     if (im[i] < 0) continue;
5273 #if defined(PETSC_USE_DEBUG)
5274     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5275 #endif
5276     if (im[i] >= rstart && im[i] < rend) {
5277       row      = im[i] - rstart;
5278       lastcol1 = -1;
5279       rp1      = aj + ai[row];
5280       ap1      = aa + ai[row];
5281       rmax1    = aimax[row];
5282       nrow1    = ailen[row];
5283       low1     = 0;
5284       high1    = nrow1;
5285       lastcol2 = -1;
5286       rp2      = bj + bi[row];
5287       ap2      = ba + bi[row];
5288       rmax2    = bimax[row];
5289       nrow2    = bilen[row];
5290       low2     = 0;
5291       high2    = nrow2;
5292 
5293       for (j=0; j<n; j++) {
5294         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5295         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5296         if (in[j] >= cstart && in[j] < cend){
5297           col = in[j] - cstart;
5298           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5299         } else if (in[j] < 0) continue;
5300 #if defined(PETSC_USE_DEBUG)
5301         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5302 #endif
5303         else {
5304           if (mat->was_assembled) {
5305             if (!aij->colmap) {
5306               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5307             }
5308 #if defined (PETSC_USE_CTABLE)
5309             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5310 	    col--;
5311 #else
5312             col = aij->colmap[in[j]] - 1;
5313 #endif
5314             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5315               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5316               col =  in[j];
5317               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5318               B = aij->B;
5319               b = (Mat_SeqAIJ*)B->data;
5320               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5321               rp2      = bj + bi[row];
5322               ap2      = ba + bi[row];
5323               rmax2    = bimax[row];
5324               nrow2    = bilen[row];
5325               low2     = 0;
5326               high2    = nrow2;
5327               bm       = aij->B->rmap->n;
5328               ba = b->a;
5329             }
5330           } else col = in[j];
5331           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5332         }
5333       }
5334     } else {
5335       if (!aij->donotstash) {
5336         if (roworiented) {
5337           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5338         } else {
5339           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5340         }
5341       }
5342     }
5343   }}
5344   PetscFunctionReturnVoid();
5345 }
5346 EXTERN_C_END
5347 
5348