xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 1be63e664250e34a939e9e0d4248cc110df2bc30)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "petscblaslapack.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscTruth     aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap->n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap->n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap->n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap->n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 
207 #define CHUNKSIZE   15
208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
209 { \
210     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
211     lastcol1 = col;\
212     while (high1-low1 > 5) { \
213       t = (low1+high1)/2; \
214       if (rp1[t] > col) high1 = t; \
215       else             low1  = t; \
216     } \
217       for (_i=low1; _i<high1; _i++) { \
218         if (rp1[_i] > col) break; \
219         if (rp1[_i] == col) { \
220           if (addv == ADD_VALUES) ap1[_i] += value;   \
221           else                    ap1[_i] = value; \
222           goto a_noinsert; \
223         } \
224       }  \
225       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
226       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
227       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
228       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
229       N = nrow1++ - 1; a->nz++; high1++; \
230       /* shift up all the later entries in this row */ \
231       for (ii=N; ii>=_i; ii--) { \
232         rp1[ii+1] = rp1[ii]; \
233         ap1[ii+1] = ap1[ii]; \
234       } \
235       rp1[_i] = col;  \
236       ap1[_i] = value;  \
237       a_noinsert: ; \
238       ailen[row] = nrow1; \
239 }
240 
241 
242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
243 { \
244     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
245     lastcol2 = col;\
246     while (high2-low2 > 5) { \
247       t = (low2+high2)/2; \
248       if (rp2[t] > col) high2 = t; \
249       else             low2  = t; \
250     } \
251     for (_i=low2; _i<high2; _i++) {		\
252       if (rp2[_i] > col) break;			\
253       if (rp2[_i] == col) {			      \
254 	if (addv == ADD_VALUES) ap2[_i] += value;     \
255 	else                    ap2[_i] = value;      \
256 	goto b_noinsert;			      \
257       }						      \
258     }							      \
259     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
260     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
261     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
262     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
263     N = nrow2++ - 1; b->nz++; high2++;					\
264     /* shift up all the later entries in this row */			\
265     for (ii=N; ii>=_i; ii--) {						\
266       rp2[ii+1] = rp2[ii];						\
267       ap2[ii+1] = ap2[ii];						\
268     }									\
269     rp2[_i] = col;							\
270     ap2[_i] = value;							\
271     b_noinsert: ;								\
272     bilen[row] = nrow2;							\
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
278 {
279   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
280   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
281   PetscErrorCode ierr;
282   PetscInt       l,*garray = mat->garray,diag;
283 
284   PetscFunctionBegin;
285   /* code only works for square matrices A */
286 
287   /* find size of row to the left of the diagonal part */
288   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
289   row  = row - diag;
290   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
291     if (garray[b->j[b->i[row]+l]] > diag) break;
292   }
293   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
294 
295   /* diagonal part */
296   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
297 
298   /* right of diagonal part */
299   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
300   PetscFunctionReturn(0);
301 }
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "MatSetValues_MPIAIJ"
305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
306 {
307   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
308   PetscScalar    value;
309   PetscErrorCode ierr;
310   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
311   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
312   PetscTruth     roworiented = aij->roworiented;
313 
314   /* Some Variables required in the macro */
315   Mat            A = aij->A;
316   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
317   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
318   MatScalar      *aa = a->a;
319   PetscTruth     ignorezeroentries = a->ignorezeroentries;
320   Mat            B = aij->B;
321   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
322   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
323   MatScalar      *ba = b->a;
324 
325   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
326   PetscInt       nonew = a->nonew;
327   MatScalar      *ap1,*ap2;
328 
329   PetscFunctionBegin;
330   if (v) PetscValidScalarPointer(v,6);
331   for (i=0; i<m; i++) {
332     if (im[i] < 0) continue;
333 #if defined(PETSC_USE_DEBUG)
334     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
335 #endif
336     if (im[i] >= rstart && im[i] < rend) {
337       row      = im[i] - rstart;
338       lastcol1 = -1;
339       rp1      = aj + ai[row];
340       ap1      = aa + ai[row];
341       rmax1    = aimax[row];
342       nrow1    = ailen[row];
343       low1     = 0;
344       high1    = nrow1;
345       lastcol2 = -1;
346       rp2      = bj + bi[row];
347       ap2      = ba + bi[row];
348       rmax2    = bimax[row];
349       nrow2    = bilen[row];
350       low2     = 0;
351       high2    = nrow2;
352 
353       for (j=0; j<n; j++) {
354         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
355         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
356         if (in[j] >= cstart && in[j] < cend){
357           col = in[j] - cstart;
358           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
359         } else if (in[j] < 0) continue;
360 #if defined(PETSC_USE_DEBUG)
361         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
362 #endif
363         else {
364           if (mat->was_assembled) {
365             if (!aij->colmap) {
366               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
367             }
368 #if defined (PETSC_USE_CTABLE)
369             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
370 	    col--;
371 #else
372             col = aij->colmap[in[j]] - 1;
373 #endif
374             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
375               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
376               col =  in[j];
377               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
378               B = aij->B;
379               b = (Mat_SeqAIJ*)B->data;
380               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
381               rp2      = bj + bi[row];
382               ap2      = ba + bi[row];
383               rmax2    = bimax[row];
384               nrow2    = bilen[row];
385               low2     = 0;
386               high2    = nrow2;
387               bm       = aij->B->rmap->n;
388               ba = b->a;
389             }
390           } else col = in[j];
391           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
392         }
393       }
394     } else {
395       if (!aij->donotstash) {
396         if (roworiented) {
397           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
398         } else {
399           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
400         }
401       }
402     }
403   }
404   PetscFunctionReturn(0);
405 }
406 
407 #undef __FUNCT__
408 #define __FUNCT__ "MatGetValues_MPIAIJ"
409 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
410 {
411   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
412   PetscErrorCode ierr;
413   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
414   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
415 
416   PetscFunctionBegin;
417   for (i=0; i<m; i++) {
418     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
419     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
420     if (idxm[i] >= rstart && idxm[i] < rend) {
421       row = idxm[i] - rstart;
422       for (j=0; j<n; j++) {
423         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
424         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
425         if (idxn[j] >= cstart && idxn[j] < cend){
426           col = idxn[j] - cstart;
427           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
428         } else {
429           if (!aij->colmap) {
430             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
431           }
432 #if defined (PETSC_USE_CTABLE)
433           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
434           col --;
435 #else
436           col = aij->colmap[idxn[j]] - 1;
437 #endif
438           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
439           else {
440             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
441           }
442         }
443       }
444     } else {
445       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
446     }
447   }
448   PetscFunctionReturn(0);
449 }
450 
451 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
452 
453 #undef __FUNCT__
454 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
455 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
456 {
457   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
458   PetscErrorCode ierr;
459   PetscInt       nstash,reallocs;
460   InsertMode     addv;
461 
462   PetscFunctionBegin;
463   if (aij->donotstash) {
464     PetscFunctionReturn(0);
465   }
466 
467   /* make sure all processors are either in INSERTMODE or ADDMODE */
468   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
469   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
470   mat->insertmode = addv; /* in case this processor had no cache */
471 
472   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
473   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
474   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
475   PetscFunctionReturn(0);
476 }
477 
478 #undef __FUNCT__
479 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
480 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
481 {
482   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
483   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
484   PetscErrorCode ierr;
485   PetscMPIInt    n;
486   PetscInt       i,j,rstart,ncols,flg;
487   PetscInt       *row,*col;
488   PetscTruth     other_disassembled;
489   PetscScalar    *val;
490   InsertMode     addv = mat->insertmode;
491 
492   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
493   PetscFunctionBegin;
494   if (!aij->donotstash) {
495     while (1) {
496       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
497       if (!flg) break;
498 
499       for (i=0; i<n;) {
500         /* Now identify the consecutive vals belonging to the same row */
501         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
502         if (j < n) ncols = j-i;
503         else       ncols = n-i;
504         /* Now assemble all these values with a single function call */
505         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
506         i = j;
507       }
508     }
509     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
510   }
511   a->compressedrow.use     = PETSC_FALSE;
512   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
513   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
514 
515   /* determine if any processor has disassembled, if so we must
516      also disassemble ourselfs, in order that we may reassemble. */
517   /*
518      if nonzero structure of submatrix B cannot change then we know that
519      no processor disassembled thus we can skip this stuff
520   */
521   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
522     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
523     if (mat->was_assembled && !other_disassembled) {
524       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
525     }
526   }
527   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
528     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
529   }
530   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
531   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
532   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
533   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
534 
535   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
536   aij->rowvalues = 0;
537 
538   /* used by MatAXPY() */
539   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
540   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
541 
542   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
543   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
544   PetscFunctionReturn(0);
545 }
546 
547 #undef __FUNCT__
548 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
549 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
550 {
551   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
552   PetscErrorCode ierr;
553 
554   PetscFunctionBegin;
555   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
556   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatZeroRows_MPIAIJ"
562 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
563 {
564   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
565   PetscErrorCode ierr;
566   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
567   PetscInt       i,*owners = A->rmap->range;
568   PetscInt       *nprocs,j,idx,nsends,row;
569   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
570   PetscInt       *rvalues,count,base,slen,*source;
571   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
572   MPI_Comm       comm = ((PetscObject)A)->comm;
573   MPI_Request    *send_waits,*recv_waits;
574   MPI_Status     recv_status,*send_status;
575 #if defined(PETSC_DEBUG)
576   PetscTruth     found = PETSC_FALSE;
577 #endif
578 
579   PetscFunctionBegin;
580   /*  first count number of contributors to each processor */
581   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
582   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
583   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
584   j = 0;
585   for (i=0; i<N; i++) {
586     if (lastidx > (idx = rows[i])) j = 0;
587     lastidx = idx;
588     for (; j<size; j++) {
589       if (idx >= owners[j] && idx < owners[j+1]) {
590         nprocs[2*j]++;
591         nprocs[2*j+1] = 1;
592         owner[i] = j;
593 #if defined(PETSC_DEBUG)
594         found = PETSC_TRUE;
595 #endif
596         break;
597       }
598     }
599 #if defined(PETSC_DEBUG)
600     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
601     found = PETSC_FALSE;
602 #endif
603   }
604   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
605 
606   /* inform other processors of number of messages and max length*/
607   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
608 
609   /* post receives:   */
610   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
611   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
612   for (i=0; i<nrecvs; i++) {
613     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
614   }
615 
616   /* do sends:
617       1) starts[i] gives the starting index in svalues for stuff going to
618          the ith processor
619   */
620   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
621   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
622   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
623   starts[0] = 0;
624   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
625   for (i=0; i<N; i++) {
626     svalues[starts[owner[i]]++] = rows[i];
627   }
628 
629   starts[0] = 0;
630   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
631   count = 0;
632   for (i=0; i<size; i++) {
633     if (nprocs[2*i+1]) {
634       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
635     }
636   }
637   ierr = PetscFree(starts);CHKERRQ(ierr);
638 
639   base = owners[rank];
640 
641   /*  wait on receives */
642   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
643   count  = nrecvs; slen = 0;
644   while (count) {
645     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
646     /* unpack receives into our local space */
647     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
648     source[imdex]  = recv_status.MPI_SOURCE;
649     lens[imdex]    = n;
650     slen          += n;
651     count--;
652   }
653   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
654 
655   /* move the data into the send scatter */
656   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
657   count = 0;
658   for (i=0; i<nrecvs; i++) {
659     values = rvalues + i*nmax;
660     for (j=0; j<lens[i]; j++) {
661       lrows[count++] = values[j] - base;
662     }
663   }
664   ierr = PetscFree(rvalues);CHKERRQ(ierr);
665   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
666   ierr = PetscFree(owner);CHKERRQ(ierr);
667   ierr = PetscFree(nprocs);CHKERRQ(ierr);
668 
669   /* actually zap the local rows */
670   /*
671         Zero the required rows. If the "diagonal block" of the matrix
672      is square and the user wishes to set the diagonal we use separate
673      code so that MatSetValues() is not called for each diagonal allocating
674      new memory, thus calling lots of mallocs and slowing things down.
675 
676   */
677   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
678   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
679   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
680     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
681   } else if (diag != 0.0) {
682     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
683     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
684       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
685 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
686     }
687     for (i = 0; i < slen; i++) {
688       row  = lrows[i] + rstart;
689       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
690     }
691     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
692     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
693   } else {
694     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
695   }
696   ierr = PetscFree(lrows);CHKERRQ(ierr);
697 
698   /* wait on sends */
699   if (nsends) {
700     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
701     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
702     ierr = PetscFree(send_status);CHKERRQ(ierr);
703   }
704   ierr = PetscFree(send_waits);CHKERRQ(ierr);
705   ierr = PetscFree(svalues);CHKERRQ(ierr);
706 
707   PetscFunctionReturn(0);
708 }
709 
710 #undef __FUNCT__
711 #define __FUNCT__ "MatMult_MPIAIJ"
712 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
713 {
714   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
715   PetscErrorCode ierr;
716   PetscInt       nt;
717 
718   PetscFunctionBegin;
719   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
720   if (nt != A->cmap->n) {
721     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
722   }
723   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
724   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
725   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
726   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
727   PetscFunctionReturn(0);
728 }
729 
730 #undef __FUNCT__
731 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
732 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
733 {
734   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
735   PetscErrorCode ierr;
736 
737   PetscFunctionBegin;
738   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
739   PetscFunctionReturn(0);
740 }
741 
742 #undef __FUNCT__
743 #define __FUNCT__ "MatMultAdd_MPIAIJ"
744 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
745 {
746   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
747   PetscErrorCode ierr;
748 
749   PetscFunctionBegin;
750   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
751   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
752   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
753   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
754   PetscFunctionReturn(0);
755 }
756 
757 #undef __FUNCT__
758 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
759 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
760 {
761   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
762   PetscErrorCode ierr;
763   PetscTruth     merged;
764 
765   PetscFunctionBegin;
766   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
767   /* do nondiagonal part */
768   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
769   if (!merged) {
770     /* send it on its way */
771     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
772     /* do local part */
773     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
774     /* receive remote parts: note this assumes the values are not actually */
775     /* added in yy until the next line, */
776     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
777   } else {
778     /* do local part */
779     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
780     /* send it on its way */
781     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
782     /* values actually were received in the Begin() but we need to call this nop */
783     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
784   }
785   PetscFunctionReturn(0);
786 }
787 
788 EXTERN_C_BEGIN
789 #undef __FUNCT__
790 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
791 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
792 {
793   MPI_Comm       comm;
794   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
795   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
796   IS             Me,Notme;
797   PetscErrorCode ierr;
798   PetscInt       M,N,first,last,*notme,i;
799   PetscMPIInt    size;
800 
801   PetscFunctionBegin;
802 
803   /* Easy test: symmetric diagonal block */
804   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
805   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
806   if (!*f) PetscFunctionReturn(0);
807   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
808   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
809   if (size == 1) PetscFunctionReturn(0);
810 
811   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
812   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
813   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
814   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
815   for (i=0; i<first; i++) notme[i] = i;
816   for (i=last; i<M; i++) notme[i-last+first] = i;
817   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
818   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
819   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
820   Aoff = Aoffs[0];
821   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
822   Boff = Boffs[0];
823   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
824   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
825   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
826   ierr = ISDestroy(Me);CHKERRQ(ierr);
827   ierr = ISDestroy(Notme);CHKERRQ(ierr);
828 
829   PetscFunctionReturn(0);
830 }
831 EXTERN_C_END
832 
833 #undef __FUNCT__
834 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
835 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
836 {
837   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
838   PetscErrorCode ierr;
839 
840   PetscFunctionBegin;
841   /* do nondiagonal part */
842   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
843   /* send it on its way */
844   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
845   /* do local part */
846   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
847   /* receive remote parts */
848   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
849   PetscFunctionReturn(0);
850 }
851 
852 /*
853   This only works correctly for square matrices where the subblock A->A is the
854    diagonal block
855 */
856 #undef __FUNCT__
857 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
858 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
859 {
860   PetscErrorCode ierr;
861   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
862 
863   PetscFunctionBegin;
864   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
865   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
866   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
867   PetscFunctionReturn(0);
868 }
869 
870 #undef __FUNCT__
871 #define __FUNCT__ "MatScale_MPIAIJ"
872 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
873 {
874   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
875   PetscErrorCode ierr;
876 
877   PetscFunctionBegin;
878   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
879   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
880   PetscFunctionReturn(0);
881 }
882 
883 #undef __FUNCT__
884 #define __FUNCT__ "MatDestroy_MPIAIJ"
885 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
886 {
887   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
888   PetscErrorCode ierr;
889 
890   PetscFunctionBegin;
891 #if defined(PETSC_USE_LOG)
892   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
893 #endif
894   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
895   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
896   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
897   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
898 #if defined (PETSC_USE_CTABLE)
899   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
900 #else
901   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
902 #endif
903   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
904   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
905   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
906   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
907   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
908   ierr = PetscFree(aij);CHKERRQ(ierr);
909 
910   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
911   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
912   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
913   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
914   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
915   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
916   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
917   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
918   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
919   PetscFunctionReturn(0);
920 }
921 
922 #undef __FUNCT__
923 #define __FUNCT__ "MatView_MPIAIJ_Binary"
924 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
925 {
926   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
927   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
928   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
929   PetscErrorCode    ierr;
930   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
931   int               fd;
932   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
933   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
934   PetscScalar       *column_values;
935 
936   PetscFunctionBegin;
937   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
938   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
939   nz   = A->nz + B->nz;
940   if (!rank) {
941     header[0] = MAT_FILE_CLASSID;
942     header[1] = mat->rmap->N;
943     header[2] = mat->cmap->N;
944     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
945     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
946     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
947     /* get largest number of rows any processor has */
948     rlen = mat->rmap->n;
949     range = mat->rmap->range;
950     for (i=1; i<size; i++) {
951       rlen = PetscMax(rlen,range[i+1] - range[i]);
952     }
953   } else {
954     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
955     rlen = mat->rmap->n;
956   }
957 
958   /* load up the local row counts */
959   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
960   for (i=0; i<mat->rmap->n; i++) {
961     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
962   }
963 
964   /* store the row lengths to the file */
965   if (!rank) {
966     MPI_Status status;
967     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
968     for (i=1; i<size; i++) {
969       rlen = range[i+1] - range[i];
970       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
971       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
972     }
973   } else {
974     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
975   }
976   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
977 
978   /* load up the local column indices */
979   nzmax = nz; /* )th processor needs space a largest processor needs */
980   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
981   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
982   cnt  = 0;
983   for (i=0; i<mat->rmap->n; i++) {
984     for (j=B->i[i]; j<B->i[i+1]; j++) {
985       if ( (col = garray[B->j[j]]) > cstart) break;
986       column_indices[cnt++] = col;
987     }
988     for (k=A->i[i]; k<A->i[i+1]; k++) {
989       column_indices[cnt++] = A->j[k] + cstart;
990     }
991     for (; j<B->i[i+1]; j++) {
992       column_indices[cnt++] = garray[B->j[j]];
993     }
994   }
995   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
996 
997   /* store the column indices to the file */
998   if (!rank) {
999     MPI_Status status;
1000     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1001     for (i=1; i<size; i++) {
1002       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1003       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1004       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1005       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1006     }
1007   } else {
1008     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1009     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1010   }
1011   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1012 
1013   /* load up the local column values */
1014   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1015   cnt  = 0;
1016   for (i=0; i<mat->rmap->n; i++) {
1017     for (j=B->i[i]; j<B->i[i+1]; j++) {
1018       if ( garray[B->j[j]] > cstart) break;
1019       column_values[cnt++] = B->a[j];
1020     }
1021     for (k=A->i[i]; k<A->i[i+1]; k++) {
1022       column_values[cnt++] = A->a[k];
1023     }
1024     for (; j<B->i[i+1]; j++) {
1025       column_values[cnt++] = B->a[j];
1026     }
1027   }
1028   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1029 
1030   /* store the column values to the file */
1031   if (!rank) {
1032     MPI_Status status;
1033     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1034     for (i=1; i<size; i++) {
1035       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1036       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1037       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1038       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1039     }
1040   } else {
1041     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1042     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1043   }
1044   ierr = PetscFree(column_values);CHKERRQ(ierr);
1045   PetscFunctionReturn(0);
1046 }
1047 
1048 #undef __FUNCT__
1049 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1050 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1051 {
1052   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1053   PetscErrorCode    ierr;
1054   PetscMPIInt       rank = aij->rank,size = aij->size;
1055   PetscTruth        isdraw,iascii,isbinary;
1056   PetscViewer       sviewer;
1057   PetscViewerFormat format;
1058 
1059   PetscFunctionBegin;
1060   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1061   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1062   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1063   if (iascii) {
1064     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1065     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1066       MatInfo    info;
1067       PetscTruth inodes;
1068 
1069       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1070       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1071       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1072       if (!inodes) {
1073         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1074 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1075       } else {
1076         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1077 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1078       }
1079       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1080       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1081       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1082       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1083       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1084       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1085       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1086       PetscFunctionReturn(0);
1087     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1088       PetscInt   inodecount,inodelimit,*inodes;
1089       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1090       if (inodes) {
1091         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1092       } else {
1093         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1094       }
1095       PetscFunctionReturn(0);
1096     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1097       PetscFunctionReturn(0);
1098     }
1099   } else if (isbinary) {
1100     if (size == 1) {
1101       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1102       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1103     } else {
1104       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1105     }
1106     PetscFunctionReturn(0);
1107   } else if (isdraw) {
1108     PetscDraw  draw;
1109     PetscTruth isnull;
1110     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1111     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1112   }
1113 
1114   if (size == 1) {
1115     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1116     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1117   } else {
1118     /* assemble the entire matrix onto first processor. */
1119     Mat         A;
1120     Mat_SeqAIJ  *Aloc;
1121     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1122     MatScalar   *a;
1123 
1124     if (mat->rmap->N > 1024) {
1125       PetscTruth flg = PETSC_FALSE;
1126 
1127       ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1128       if (!flg) {
1129         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1130       }
1131     }
1132 
1133     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1134     if (!rank) {
1135       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1136     } else {
1137       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1138     }
1139     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1140     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1141     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1142     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1143 
1144     /* copy over the A part */
1145     Aloc = (Mat_SeqAIJ*)aij->A->data;
1146     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1147     row = mat->rmap->rstart;
1148     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1149     for (i=0; i<m; i++) {
1150       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1151       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1152     }
1153     aj = Aloc->j;
1154     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1155 
1156     /* copy over the B part */
1157     Aloc = (Mat_SeqAIJ*)aij->B->data;
1158     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1159     row  = mat->rmap->rstart;
1160     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1161     ct   = cols;
1162     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1163     for (i=0; i<m; i++) {
1164       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1165       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1166     }
1167     ierr = PetscFree(ct);CHKERRQ(ierr);
1168     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1169     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1170     /*
1171        Everyone has to call to draw the matrix since the graphics waits are
1172        synchronized across all processors that share the PetscDraw object
1173     */
1174     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1175     if (!rank) {
1176       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1177       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1178     }
1179     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1180     ierr = MatDestroy(A);CHKERRQ(ierr);
1181   }
1182   PetscFunctionReturn(0);
1183 }
1184 
1185 #undef __FUNCT__
1186 #define __FUNCT__ "MatView_MPIAIJ"
1187 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1188 {
1189   PetscErrorCode ierr;
1190   PetscTruth     iascii,isdraw,issocket,isbinary;
1191 
1192   PetscFunctionBegin;
1193   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1194   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1195   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1196   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1197   if (iascii || isdraw || isbinary || issocket) {
1198     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1199   } else {
1200     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1201   }
1202   PetscFunctionReturn(0);
1203 }
1204 
1205 #undef __FUNCT__
1206 #define __FUNCT__ "MatSOR_MPIAIJ"
1207 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1208 {
1209   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1210   PetscErrorCode ierr;
1211   Vec            bb1 = 0;
1212   PetscTruth     hasop;
1213 
1214   PetscFunctionBegin;
1215   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1216     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1217   }
1218 
1219   if (flag == SOR_APPLY_UPPER) {
1220     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1221     PetscFunctionReturn(0);
1222   }
1223 
1224   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1225     if (flag & SOR_ZERO_INITIAL_GUESS) {
1226       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1227       its--;
1228     }
1229 
1230     while (its--) {
1231       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1232       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1233 
1234       /* update rhs: bb1 = bb - B*x */
1235       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1236       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1237 
1238       /* local sweep */
1239       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1240     }
1241   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1242     if (flag & SOR_ZERO_INITIAL_GUESS) {
1243       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1244       its--;
1245     }
1246     while (its--) {
1247       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1248       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1249 
1250       /* update rhs: bb1 = bb - B*x */
1251       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1252       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1253 
1254       /* local sweep */
1255       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1256     }
1257   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1258     if (flag & SOR_ZERO_INITIAL_GUESS) {
1259       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1260       its--;
1261     }
1262     while (its--) {
1263       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1264       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1265 
1266       /* update rhs: bb1 = bb - B*x */
1267       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1268       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1269 
1270       /* local sweep */
1271       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1272     }
1273   }  else if (flag & SOR_EISENSTAT) {
1274     Vec         xx1;
1275 
1276     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1277     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1278 
1279     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1280     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1281     if (!mat->diag) {
1282       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1283       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1284     }
1285     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1286     if (hasop) {
1287       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1288     } else {
1289       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1290     }
1291     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1292 
1293     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1294 
1295     /* local sweep */
1296     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1297     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1298     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1299   } else {
1300     SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1301   }
1302 
1303   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 #undef __FUNCT__
1308 #define __FUNCT__ "MatPermute_MPIAIJ"
1309 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1310 {
1311   MPI_Comm       comm,pcomm;
1312   PetscInt       first,local_size,nrows;
1313   const PetscInt *rows;
1314   PetscMPIInt    size;
1315   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1316   PetscErrorCode ierr;
1317 
1318   PetscFunctionBegin;
1319   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1320   /* make a collective version of 'rowp' */
1321   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1322   if (pcomm==comm) {
1323     crowp = rowp;
1324   } else {
1325     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1326     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1327     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1328     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1329   }
1330   /* collect the global row permutation and invert it */
1331   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1332   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1333   if (pcomm!=comm) {
1334     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1335   }
1336   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1337   /* get the local target indices */
1338   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1339   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1340   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1341   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
1342   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1343   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1344   /* the column permutation is so much easier;
1345      make a local version of 'colp' and invert it */
1346   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1347   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1348   if (size==1) {
1349     lcolp = colp;
1350   } else {
1351     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1352     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1353     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
1354   }
1355   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1356   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1357   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1358   if (size>1) {
1359     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1360     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1361   }
1362   /* now we just get the submatrix */
1363   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1364   /* clean up */
1365   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1366   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1367   PetscFunctionReturn(0);
1368 }
1369 
1370 #undef __FUNCT__
1371 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1372 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1373 {
1374   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1375   Mat            A = mat->A,B = mat->B;
1376   PetscErrorCode ierr;
1377   PetscReal      isend[5],irecv[5];
1378 
1379   PetscFunctionBegin;
1380   info->block_size     = 1.0;
1381   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1382   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1383   isend[3] = info->memory;  isend[4] = info->mallocs;
1384   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1385   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1386   isend[3] += info->memory;  isend[4] += info->mallocs;
1387   if (flag == MAT_LOCAL) {
1388     info->nz_used      = isend[0];
1389     info->nz_allocated = isend[1];
1390     info->nz_unneeded  = isend[2];
1391     info->memory       = isend[3];
1392     info->mallocs      = isend[4];
1393   } else if (flag == MAT_GLOBAL_MAX) {
1394     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1395     info->nz_used      = irecv[0];
1396     info->nz_allocated = irecv[1];
1397     info->nz_unneeded  = irecv[2];
1398     info->memory       = irecv[3];
1399     info->mallocs      = irecv[4];
1400   } else if (flag == MAT_GLOBAL_SUM) {
1401     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1402     info->nz_used      = irecv[0];
1403     info->nz_allocated = irecv[1];
1404     info->nz_unneeded  = irecv[2];
1405     info->memory       = irecv[3];
1406     info->mallocs      = irecv[4];
1407   }
1408   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1409   info->fill_ratio_needed = 0;
1410   info->factor_mallocs    = 0;
1411 
1412   PetscFunctionReturn(0);
1413 }
1414 
1415 #undef __FUNCT__
1416 #define __FUNCT__ "MatSetOption_MPIAIJ"
1417 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1418 {
1419   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1420   PetscErrorCode ierr;
1421 
1422   PetscFunctionBegin;
1423   switch (op) {
1424   case MAT_NEW_NONZERO_LOCATIONS:
1425   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1426   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1427   case MAT_KEEP_NONZERO_PATTERN:
1428   case MAT_NEW_NONZERO_LOCATION_ERR:
1429   case MAT_USE_INODES:
1430   case MAT_IGNORE_ZERO_ENTRIES:
1431     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1432     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1433     break;
1434   case MAT_ROW_ORIENTED:
1435     a->roworiented = flg;
1436     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1437     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1438     break;
1439   case MAT_NEW_DIAGONALS:
1440     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1441     break;
1442   case MAT_IGNORE_OFF_PROC_ENTRIES:
1443     a->donotstash = PETSC_TRUE;
1444     break;
1445   case MAT_SYMMETRIC:
1446     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1447     break;
1448   case MAT_STRUCTURALLY_SYMMETRIC:
1449     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1450     break;
1451   case MAT_HERMITIAN:
1452     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1453     break;
1454   case MAT_SYMMETRY_ETERNAL:
1455     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1456     break;
1457   default:
1458     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1459   }
1460   PetscFunctionReturn(0);
1461 }
1462 
1463 #undef __FUNCT__
1464 #define __FUNCT__ "MatGetRow_MPIAIJ"
1465 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1466 {
1467   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1468   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1469   PetscErrorCode ierr;
1470   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1471   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1472   PetscInt       *cmap,*idx_p;
1473 
1474   PetscFunctionBegin;
1475   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1476   mat->getrowactive = PETSC_TRUE;
1477 
1478   if (!mat->rowvalues && (idx || v)) {
1479     /*
1480         allocate enough space to hold information from the longest row.
1481     */
1482     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1483     PetscInt   max = 1,tmp;
1484     for (i=0; i<matin->rmap->n; i++) {
1485       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1486       if (max < tmp) { max = tmp; }
1487     }
1488     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1489   }
1490 
1491   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1492   lrow = row - rstart;
1493 
1494   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1495   if (!v)   {pvA = 0; pvB = 0;}
1496   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1497   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1498   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1499   nztot = nzA + nzB;
1500 
1501   cmap  = mat->garray;
1502   if (v  || idx) {
1503     if (nztot) {
1504       /* Sort by increasing column numbers, assuming A and B already sorted */
1505       PetscInt imark = -1;
1506       if (v) {
1507         *v = v_p = mat->rowvalues;
1508         for (i=0; i<nzB; i++) {
1509           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1510           else break;
1511         }
1512         imark = i;
1513         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1514         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1515       }
1516       if (idx) {
1517         *idx = idx_p = mat->rowindices;
1518         if (imark > -1) {
1519           for (i=0; i<imark; i++) {
1520             idx_p[i] = cmap[cworkB[i]];
1521           }
1522         } else {
1523           for (i=0; i<nzB; i++) {
1524             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1525             else break;
1526           }
1527           imark = i;
1528         }
1529         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1530         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1531       }
1532     } else {
1533       if (idx) *idx = 0;
1534       if (v)   *v   = 0;
1535     }
1536   }
1537   *nz = nztot;
1538   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1539   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1540   PetscFunctionReturn(0);
1541 }
1542 
1543 #undef __FUNCT__
1544 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1545 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1546 {
1547   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1548 
1549   PetscFunctionBegin;
1550   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1551   aij->getrowactive = PETSC_FALSE;
1552   PetscFunctionReturn(0);
1553 }
1554 
1555 #undef __FUNCT__
1556 #define __FUNCT__ "MatNorm_MPIAIJ"
1557 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1558 {
1559   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1560   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1561   PetscErrorCode ierr;
1562   PetscInt       i,j,cstart = mat->cmap->rstart;
1563   PetscReal      sum = 0.0;
1564   MatScalar      *v;
1565 
1566   PetscFunctionBegin;
1567   if (aij->size == 1) {
1568     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1569   } else {
1570     if (type == NORM_FROBENIUS) {
1571       v = amat->a;
1572       for (i=0; i<amat->nz; i++) {
1573 #if defined(PETSC_USE_COMPLEX)
1574         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1575 #else
1576         sum += (*v)*(*v); v++;
1577 #endif
1578       }
1579       v = bmat->a;
1580       for (i=0; i<bmat->nz; i++) {
1581 #if defined(PETSC_USE_COMPLEX)
1582         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1583 #else
1584         sum += (*v)*(*v); v++;
1585 #endif
1586       }
1587       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1588       *norm = sqrt(*norm);
1589     } else if (type == NORM_1) { /* max column norm */
1590       PetscReal *tmp,*tmp2;
1591       PetscInt  *jj,*garray = aij->garray;
1592       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1593       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1594       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1595       *norm = 0.0;
1596       v = amat->a; jj = amat->j;
1597       for (j=0; j<amat->nz; j++) {
1598         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1599       }
1600       v = bmat->a; jj = bmat->j;
1601       for (j=0; j<bmat->nz; j++) {
1602         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1603       }
1604       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1605       for (j=0; j<mat->cmap->N; j++) {
1606         if (tmp2[j] > *norm) *norm = tmp2[j];
1607       }
1608       ierr = PetscFree(tmp);CHKERRQ(ierr);
1609       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1610     } else if (type == NORM_INFINITY) { /* max row norm */
1611       PetscReal ntemp = 0.0;
1612       for (j=0; j<aij->A->rmap->n; j++) {
1613         v = amat->a + amat->i[j];
1614         sum = 0.0;
1615         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1616           sum += PetscAbsScalar(*v); v++;
1617         }
1618         v = bmat->a + bmat->i[j];
1619         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1620           sum += PetscAbsScalar(*v); v++;
1621         }
1622         if (sum > ntemp) ntemp = sum;
1623       }
1624       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1625     } else {
1626       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1627     }
1628   }
1629   PetscFunctionReturn(0);
1630 }
1631 
1632 #undef __FUNCT__
1633 #define __FUNCT__ "MatTranspose_MPIAIJ"
1634 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1635 {
1636   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1637   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1638   PetscErrorCode ierr;
1639   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1640   PetscInt       cstart=A->cmap->rstart,ncol;
1641   Mat            B;
1642   MatScalar      *array;
1643 
1644   PetscFunctionBegin;
1645   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1646 
1647   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1648   ai = Aloc->i; aj = Aloc->j;
1649   bi = Bloc->i; bj = Bloc->j;
1650   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1651     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1652     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1653     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1654     for (i=0; i<ai[ma]; i++){
1655       d_nnz[aj[i]] ++;
1656       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1657     }
1658 
1659     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1660     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1661     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1662     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1663     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1664   } else {
1665     B = *matout;
1666   }
1667 
1668   /* copy over the A part */
1669   array = Aloc->a;
1670   row = A->rmap->rstart;
1671   for (i=0; i<ma; i++) {
1672     ncol = ai[i+1]-ai[i];
1673     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1674     row++; array += ncol; aj += ncol;
1675   }
1676   aj = Aloc->j;
1677   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1678 
1679   /* copy over the B part */
1680   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1681   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1682   array = Bloc->a;
1683   row = A->rmap->rstart;
1684   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1685   cols_tmp = cols;
1686   for (i=0; i<mb; i++) {
1687     ncol = bi[i+1]-bi[i];
1688     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1689     row++; array += ncol; cols_tmp += ncol;
1690   }
1691   ierr = PetscFree(cols);CHKERRQ(ierr);
1692 
1693   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1694   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1695   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1696     *matout = B;
1697   } else {
1698     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1699   }
1700   PetscFunctionReturn(0);
1701 }
1702 
1703 #undef __FUNCT__
1704 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1705 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1706 {
1707   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1708   Mat            a = aij->A,b = aij->B;
1709   PetscErrorCode ierr;
1710   PetscInt       s1,s2,s3;
1711 
1712   PetscFunctionBegin;
1713   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1714   if (rr) {
1715     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1716     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1717     /* Overlap communication with computation. */
1718     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1719   }
1720   if (ll) {
1721     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1722     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1723     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1724   }
1725   /* scale  the diagonal block */
1726   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1727 
1728   if (rr) {
1729     /* Do a scatter end and then right scale the off-diagonal block */
1730     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1731     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1732   }
1733 
1734   PetscFunctionReturn(0);
1735 }
1736 
1737 #undef __FUNCT__
1738 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1739 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1740 {
1741   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1742   PetscErrorCode ierr;
1743 
1744   PetscFunctionBegin;
1745   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1746   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1747   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
1748   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
1749   PetscFunctionReturn(0);
1750 }
1751 #undef __FUNCT__
1752 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1753 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1754 {
1755   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1756   PetscErrorCode ierr;
1757 
1758   PetscFunctionBegin;
1759   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1760   PetscFunctionReturn(0);
1761 }
1762 
1763 #undef __FUNCT__
1764 #define __FUNCT__ "MatEqual_MPIAIJ"
1765 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1766 {
1767   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1768   Mat            a,b,c,d;
1769   PetscTruth     flg;
1770   PetscErrorCode ierr;
1771 
1772   PetscFunctionBegin;
1773   a = matA->A; b = matA->B;
1774   c = matB->A; d = matB->B;
1775 
1776   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1777   if (flg) {
1778     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1779   }
1780   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1781   PetscFunctionReturn(0);
1782 }
1783 
1784 #undef __FUNCT__
1785 #define __FUNCT__ "MatCopy_MPIAIJ"
1786 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1787 {
1788   PetscErrorCode ierr;
1789   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1790   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1791 
1792   PetscFunctionBegin;
1793   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1794   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1795     /* because of the column compression in the off-processor part of the matrix a->B,
1796        the number of columns in a->B and b->B may be different, hence we cannot call
1797        the MatCopy() directly on the two parts. If need be, we can provide a more
1798        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1799        then copying the submatrices */
1800     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1801   } else {
1802     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1803     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1804   }
1805   PetscFunctionReturn(0);
1806 }
1807 
1808 #undef __FUNCT__
1809 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1810 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1811 {
1812   PetscErrorCode ierr;
1813 
1814   PetscFunctionBegin;
1815   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1816   PetscFunctionReturn(0);
1817 }
1818 
1819 #undef __FUNCT__
1820 #define __FUNCT__ "MatAXPY_MPIAIJ"
1821 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1822 {
1823   PetscErrorCode ierr;
1824   PetscInt       i;
1825   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1826   PetscBLASInt   bnz,one=1;
1827   Mat_SeqAIJ     *x,*y;
1828 
1829   PetscFunctionBegin;
1830   if (str == SAME_NONZERO_PATTERN) {
1831     PetscScalar alpha = a;
1832     x = (Mat_SeqAIJ *)xx->A->data;
1833     y = (Mat_SeqAIJ *)yy->A->data;
1834     bnz = PetscBLASIntCast(x->nz);
1835     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1836     x = (Mat_SeqAIJ *)xx->B->data;
1837     y = (Mat_SeqAIJ *)yy->B->data;
1838     bnz = PetscBLASIntCast(x->nz);
1839     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1840   } else if (str == SUBSET_NONZERO_PATTERN) {
1841     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1842 
1843     x = (Mat_SeqAIJ *)xx->B->data;
1844     y = (Mat_SeqAIJ *)yy->B->data;
1845     if (y->xtoy && y->XtoY != xx->B) {
1846       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1847       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1848     }
1849     if (!y->xtoy) { /* get xtoy */
1850       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1851       y->XtoY = xx->B;
1852       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1853     }
1854     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1855   } else {
1856     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1857   }
1858   PetscFunctionReturn(0);
1859 }
1860 
1861 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1862 
1863 #undef __FUNCT__
1864 #define __FUNCT__ "MatConjugate_MPIAIJ"
1865 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1866 {
1867 #if defined(PETSC_USE_COMPLEX)
1868   PetscErrorCode ierr;
1869   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1870 
1871   PetscFunctionBegin;
1872   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1873   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1874 #else
1875   PetscFunctionBegin;
1876 #endif
1877   PetscFunctionReturn(0);
1878 }
1879 
1880 #undef __FUNCT__
1881 #define __FUNCT__ "MatRealPart_MPIAIJ"
1882 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1883 {
1884   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1885   PetscErrorCode ierr;
1886 
1887   PetscFunctionBegin;
1888   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1889   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1890   PetscFunctionReturn(0);
1891 }
1892 
1893 #undef __FUNCT__
1894 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1895 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1896 {
1897   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1898   PetscErrorCode ierr;
1899 
1900   PetscFunctionBegin;
1901   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1902   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1903   PetscFunctionReturn(0);
1904 }
1905 
1906 #ifdef PETSC_HAVE_PBGL
1907 
1908 #include <boost/parallel/mpi/bsp_process_group.hpp>
1909 #include <boost/graph/distributed/ilu_default_graph.hpp>
1910 #include <boost/graph/distributed/ilu_0_block.hpp>
1911 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1912 #include <boost/graph/distributed/petsc/interface.hpp>
1913 #include <boost/multi_array.hpp>
1914 #include <boost/parallel/distributed_property_map->hpp>
1915 
1916 #undef __FUNCT__
1917 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1918 /*
1919   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1920 */
1921 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1922 {
1923   namespace petsc = boost::distributed::petsc;
1924 
1925   namespace graph_dist = boost::graph::distributed;
1926   using boost::graph::distributed::ilu_default::process_group_type;
1927   using boost::graph::ilu_permuted;
1928 
1929   PetscTruth      row_identity, col_identity;
1930   PetscContainer  c;
1931   PetscInt        m, n, M, N;
1932   PetscErrorCode  ierr;
1933 
1934   PetscFunctionBegin;
1935   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1936   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1937   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1938   if (!row_identity || !col_identity) {
1939     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1940   }
1941 
1942   process_group_type pg;
1943   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1944   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1945   lgraph_type&   level_graph = *lgraph_p;
1946   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1947 
1948   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1949   ilu_permuted(level_graph);
1950 
1951   /* put together the new matrix */
1952   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1953   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1954   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1955   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1956   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1957   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1958   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1959 
1960   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1961   ierr = PetscContainerSetPointer(c, lgraph_p);
1962   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1963   PetscFunctionReturn(0);
1964 }
1965 
1966 #undef __FUNCT__
1967 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1968 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1969 {
1970   PetscFunctionBegin;
1971   PetscFunctionReturn(0);
1972 }
1973 
1974 #undef __FUNCT__
1975 #define __FUNCT__ "MatSolve_MPIAIJ"
1976 /*
1977   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1978 */
1979 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1980 {
1981   namespace graph_dist = boost::graph::distributed;
1982 
1983   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1984   lgraph_type*   lgraph_p;
1985   PetscContainer c;
1986   PetscErrorCode ierr;
1987 
1988   PetscFunctionBegin;
1989   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1990   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1991   ierr = VecCopy(b, x);CHKERRQ(ierr);
1992 
1993   PetscScalar* array_x;
1994   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1995   PetscInt sx;
1996   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1997 
1998   PetscScalar* array_b;
1999   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2000   PetscInt sb;
2001   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2002 
2003   lgraph_type&   level_graph = *lgraph_p;
2004   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2005 
2006   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2007   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2008                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2009 
2010   typedef boost::iterator_property_map<array_ref_type::iterator,
2011                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2012   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2013                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2014 
2015   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2016 
2017   PetscFunctionReturn(0);
2018 }
2019 #endif
2020 
2021 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2022   PetscInt       nzlocal,nsends,nrecvs;
2023   PetscMPIInt    *send_rank,*recv_rank;
2024   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2025   PetscScalar    *sbuf_a,**rbuf_a;
2026   PetscErrorCode (*MatDestroy)(Mat);
2027 } Mat_Redundant;
2028 
2029 #undef __FUNCT__
2030 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2031 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2032 {
2033   PetscErrorCode       ierr;
2034   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2035   PetscInt             i;
2036 
2037   PetscFunctionBegin;
2038   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2039   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2040   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2041   for (i=0; i<redund->nrecvs; i++){
2042     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2043     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2044   }
2045   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2046   ierr = PetscFree(redund);CHKERRQ(ierr);
2047   PetscFunctionReturn(0);
2048 }
2049 
2050 #undef __FUNCT__
2051 #define __FUNCT__ "MatDestroy_MatRedundant"
2052 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2053 {
2054   PetscErrorCode  ierr;
2055   PetscContainer  container;
2056   Mat_Redundant   *redund=PETSC_NULL;
2057 
2058   PetscFunctionBegin;
2059   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2060   if (container) {
2061     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2062   } else {
2063     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2064   }
2065   A->ops->destroy = redund->MatDestroy;
2066   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2067   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2068   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2069   PetscFunctionReturn(0);
2070 }
2071 
2072 #undef __FUNCT__
2073 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2074 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2075 {
2076   PetscMPIInt    rank,size;
2077   MPI_Comm       comm=((PetscObject)mat)->comm;
2078   PetscErrorCode ierr;
2079   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2080   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2081   PetscInt       *rowrange=mat->rmap->range;
2082   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2083   Mat            A=aij->A,B=aij->B,C=*matredundant;
2084   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2085   PetscScalar    *sbuf_a;
2086   PetscInt       nzlocal=a->nz+b->nz;
2087   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2088   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2089   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2090   MatScalar      *aworkA,*aworkB;
2091   PetscScalar    *vals;
2092   PetscMPIInt    tag1,tag2,tag3,imdex;
2093   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2094                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2095   MPI_Status     recv_status,*send_status;
2096   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2097   PetscInt       **rbuf_j=PETSC_NULL;
2098   PetscScalar    **rbuf_a=PETSC_NULL;
2099   Mat_Redundant  *redund=PETSC_NULL;
2100   PetscContainer container;
2101 
2102   PetscFunctionBegin;
2103   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2104   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2105 
2106   if (reuse == MAT_REUSE_MATRIX) {
2107     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2108     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2109     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2110     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2111     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2112     if (container) {
2113       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2114     } else {
2115       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2116     }
2117     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2118 
2119     nsends    = redund->nsends;
2120     nrecvs    = redund->nrecvs;
2121     send_rank = redund->send_rank;
2122     recv_rank = redund->recv_rank;
2123     sbuf_nz   = redund->sbuf_nz;
2124     rbuf_nz   = redund->rbuf_nz;
2125     sbuf_j    = redund->sbuf_j;
2126     sbuf_a    = redund->sbuf_a;
2127     rbuf_j    = redund->rbuf_j;
2128     rbuf_a    = redund->rbuf_a;
2129   }
2130 
2131   if (reuse == MAT_INITIAL_MATRIX){
2132     PetscMPIInt  subrank,subsize;
2133     PetscInt     nleftover,np_subcomm;
2134     /* get the destination processors' id send_rank, nsends and nrecvs */
2135     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2136     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2137     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2138     np_subcomm = size/nsubcomm;
2139     nleftover  = size - nsubcomm*np_subcomm;
2140     nsends = 0; nrecvs = 0;
2141     for (i=0; i<size; i++){ /* i=rank*/
2142       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2143         send_rank[nsends] = i; nsends++;
2144         recv_rank[nrecvs++] = i;
2145       }
2146     }
2147     if (rank >= size - nleftover){/* this proc is a leftover processor */
2148       i = size-nleftover-1;
2149       j = 0;
2150       while (j < nsubcomm - nleftover){
2151         send_rank[nsends++] = i;
2152         i--; j++;
2153       }
2154     }
2155 
2156     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2157       for (i=0; i<nleftover; i++){
2158         recv_rank[nrecvs++] = size-nleftover+i;
2159       }
2160     }
2161 
2162     /* allocate sbuf_j, sbuf_a */
2163     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2164     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2165     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2166   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2167 
2168   /* copy mat's local entries into the buffers */
2169   if (reuse == MAT_INITIAL_MATRIX){
2170     rownz_max = 0;
2171     rptr = sbuf_j;
2172     cols = sbuf_j + rend-rstart + 1;
2173     vals = sbuf_a;
2174     rptr[0] = 0;
2175     for (i=0; i<rend-rstart; i++){
2176       row = i + rstart;
2177       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2178       ncols  = nzA + nzB;
2179       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2180       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2181       /* load the column indices for this row into cols */
2182       lwrite = 0;
2183       for (l=0; l<nzB; l++) {
2184         if ((ctmp = bmap[cworkB[l]]) < cstart){
2185           vals[lwrite]   = aworkB[l];
2186           cols[lwrite++] = ctmp;
2187         }
2188       }
2189       for (l=0; l<nzA; l++){
2190         vals[lwrite]   = aworkA[l];
2191         cols[lwrite++] = cstart + cworkA[l];
2192       }
2193       for (l=0; l<nzB; l++) {
2194         if ((ctmp = bmap[cworkB[l]]) >= cend){
2195           vals[lwrite]   = aworkB[l];
2196           cols[lwrite++] = ctmp;
2197         }
2198       }
2199       vals += ncols;
2200       cols += ncols;
2201       rptr[i+1] = rptr[i] + ncols;
2202       if (rownz_max < ncols) rownz_max = ncols;
2203     }
2204     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2205   } else { /* only copy matrix values into sbuf_a */
2206     rptr = sbuf_j;
2207     vals = sbuf_a;
2208     rptr[0] = 0;
2209     for (i=0; i<rend-rstart; i++){
2210       row = i + rstart;
2211       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2212       ncols  = nzA + nzB;
2213       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2214       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2215       lwrite = 0;
2216       for (l=0; l<nzB; l++) {
2217         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2218       }
2219       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2220       for (l=0; l<nzB; l++) {
2221         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2222       }
2223       vals += ncols;
2224       rptr[i+1] = rptr[i] + ncols;
2225     }
2226   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2227 
2228   /* send nzlocal to others, and recv other's nzlocal */
2229   /*--------------------------------------------------*/
2230   if (reuse == MAT_INITIAL_MATRIX){
2231     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2232     s_waits2 = s_waits3 + nsends;
2233     s_waits1 = s_waits2 + nsends;
2234     r_waits1 = s_waits1 + nsends;
2235     r_waits2 = r_waits1 + nrecvs;
2236     r_waits3 = r_waits2 + nrecvs;
2237   } else {
2238     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2239     r_waits3 = s_waits3 + nsends;
2240   }
2241 
2242   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2243   if (reuse == MAT_INITIAL_MATRIX){
2244     /* get new tags to keep the communication clean */
2245     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2246     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2247     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2248 
2249     /* post receives of other's nzlocal */
2250     for (i=0; i<nrecvs; i++){
2251       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2252     }
2253     /* send nzlocal to others */
2254     for (i=0; i<nsends; i++){
2255       sbuf_nz[i] = nzlocal;
2256       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2257     }
2258     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2259     count = nrecvs;
2260     while (count) {
2261       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2262       recv_rank[imdex] = recv_status.MPI_SOURCE;
2263       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2264       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2265 
2266       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2267       rbuf_nz[imdex] += i + 2;
2268       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2269       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2270       count--;
2271     }
2272     /* wait on sends of nzlocal */
2273     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2274     /* send mat->i,j to others, and recv from other's */
2275     /*------------------------------------------------*/
2276     for (i=0; i<nsends; i++){
2277       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2278       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2279     }
2280     /* wait on receives of mat->i,j */
2281     /*------------------------------*/
2282     count = nrecvs;
2283     while (count) {
2284       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2285       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2286       count--;
2287     }
2288     /* wait on sends of mat->i,j */
2289     /*---------------------------*/
2290     if (nsends) {
2291       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2292     }
2293   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2294 
2295   /* post receives, send and receive mat->a */
2296   /*----------------------------------------*/
2297   for (imdex=0; imdex<nrecvs; imdex++) {
2298     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2299   }
2300   for (i=0; i<nsends; i++){
2301     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2302   }
2303   count = nrecvs;
2304   while (count) {
2305     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2306     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2307     count--;
2308   }
2309   if (nsends) {
2310     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2311   }
2312 
2313   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2314 
2315   /* create redundant matrix */
2316   /*-------------------------*/
2317   if (reuse == MAT_INITIAL_MATRIX){
2318     /* compute rownz_max for preallocation */
2319     for (imdex=0; imdex<nrecvs; imdex++){
2320       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2321       rptr = rbuf_j[imdex];
2322       for (i=0; i<j; i++){
2323         ncols = rptr[i+1] - rptr[i];
2324         if (rownz_max < ncols) rownz_max = ncols;
2325       }
2326     }
2327 
2328     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2329     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2330     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2331     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2332     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2333   } else {
2334     C = *matredundant;
2335   }
2336 
2337   /* insert local matrix entries */
2338   rptr = sbuf_j;
2339   cols = sbuf_j + rend-rstart + 1;
2340   vals = sbuf_a;
2341   for (i=0; i<rend-rstart; i++){
2342     row   = i + rstart;
2343     ncols = rptr[i+1] - rptr[i];
2344     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2345     vals += ncols;
2346     cols += ncols;
2347   }
2348   /* insert received matrix entries */
2349   for (imdex=0; imdex<nrecvs; imdex++){
2350     rstart = rowrange[recv_rank[imdex]];
2351     rend   = rowrange[recv_rank[imdex]+1];
2352     rptr = rbuf_j[imdex];
2353     cols = rbuf_j[imdex] + rend-rstart + 1;
2354     vals = rbuf_a[imdex];
2355     for (i=0; i<rend-rstart; i++){
2356       row   = i + rstart;
2357       ncols = rptr[i+1] - rptr[i];
2358       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2359       vals += ncols;
2360       cols += ncols;
2361     }
2362   }
2363   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2364   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2365   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2366   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2367   if (reuse == MAT_INITIAL_MATRIX){
2368     PetscContainer container;
2369     *matredundant = C;
2370     /* create a supporting struct and attach it to C for reuse */
2371     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2372     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2373     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2374     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2375     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2376 
2377     redund->nzlocal = nzlocal;
2378     redund->nsends  = nsends;
2379     redund->nrecvs  = nrecvs;
2380     redund->send_rank = send_rank;
2381     redund->recv_rank = recv_rank;
2382     redund->sbuf_nz = sbuf_nz;
2383     redund->rbuf_nz = rbuf_nz;
2384     redund->sbuf_j  = sbuf_j;
2385     redund->sbuf_a  = sbuf_a;
2386     redund->rbuf_j  = rbuf_j;
2387     redund->rbuf_a  = rbuf_a;
2388 
2389     redund->MatDestroy = C->ops->destroy;
2390     C->ops->destroy    = MatDestroy_MatRedundant;
2391   }
2392   PetscFunctionReturn(0);
2393 }
2394 
2395 #undef __FUNCT__
2396 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2397 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2398 {
2399   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2400   PetscErrorCode ierr;
2401   PetscInt       i,*idxb = 0;
2402   PetscScalar    *va,*vb;
2403   Vec            vtmp;
2404 
2405   PetscFunctionBegin;
2406   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2407   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2408   if (idx) {
2409     for (i=0; i<A->rmap->n; i++) {
2410       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2411     }
2412   }
2413 
2414   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2415   if (idx) {
2416     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2417   }
2418   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2419   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2420 
2421   for (i=0; i<A->rmap->n; i++){
2422     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2423       va[i] = vb[i];
2424       if (idx) idx[i] = a->garray[idxb[i]];
2425     }
2426   }
2427 
2428   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2429   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2430   if (idxb) {
2431     ierr = PetscFree(idxb);CHKERRQ(ierr);
2432   }
2433   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2434   PetscFunctionReturn(0);
2435 }
2436 
2437 #undef __FUNCT__
2438 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2439 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2440 {
2441   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2442   PetscErrorCode ierr;
2443   PetscInt       i,*idxb = 0;
2444   PetscScalar    *va,*vb;
2445   Vec            vtmp;
2446 
2447   PetscFunctionBegin;
2448   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2449   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2450   if (idx) {
2451     for (i=0; i<A->cmap->n; i++) {
2452       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2453     }
2454   }
2455 
2456   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2457   if (idx) {
2458     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2459   }
2460   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2461   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2462 
2463   for (i=0; i<A->rmap->n; i++){
2464     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2465       va[i] = vb[i];
2466       if (idx) idx[i] = a->garray[idxb[i]];
2467     }
2468   }
2469 
2470   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2471   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2472   if (idxb) {
2473     ierr = PetscFree(idxb);CHKERRQ(ierr);
2474   }
2475   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2476   PetscFunctionReturn(0);
2477 }
2478 
2479 #undef __FUNCT__
2480 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2481 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2482 {
2483   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2484   PetscInt       n      = A->rmap->n;
2485   PetscInt       cstart = A->cmap->rstart;
2486   PetscInt      *cmap   = mat->garray;
2487   PetscInt      *diagIdx, *offdiagIdx;
2488   Vec            diagV, offdiagV;
2489   PetscScalar   *a, *diagA, *offdiagA;
2490   PetscInt       r;
2491   PetscErrorCode ierr;
2492 
2493   PetscFunctionBegin;
2494   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2495   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2496   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2497   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2498   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2499   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2500   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2501   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2502   for(r = 0; r < n; ++r) {
2503     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2504       a[r]   = diagA[r];
2505       idx[r] = cstart + diagIdx[r];
2506     } else {
2507       a[r]   = offdiagA[r];
2508       idx[r] = cmap[offdiagIdx[r]];
2509     }
2510   }
2511   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2512   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2513   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2514   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2515   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2516   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2517   PetscFunctionReturn(0);
2518 }
2519 
2520 #undef __FUNCT__
2521 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2522 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2523 {
2524   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2525   PetscInt       n      = A->rmap->n;
2526   PetscInt       cstart = A->cmap->rstart;
2527   PetscInt      *cmap   = mat->garray;
2528   PetscInt      *diagIdx, *offdiagIdx;
2529   Vec            diagV, offdiagV;
2530   PetscScalar   *a, *diagA, *offdiagA;
2531   PetscInt       r;
2532   PetscErrorCode ierr;
2533 
2534   PetscFunctionBegin;
2535   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2536   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2537   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2538   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2539   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2540   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2541   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2542   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2543   for(r = 0; r < n; ++r) {
2544     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2545       a[r]   = diagA[r];
2546       idx[r] = cstart + diagIdx[r];
2547     } else {
2548       a[r]   = offdiagA[r];
2549       idx[r] = cmap[offdiagIdx[r]];
2550     }
2551   }
2552   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2553   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2554   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2555   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2556   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2557   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2558   PetscFunctionReturn(0);
2559 }
2560 
2561 #undef __FUNCT__
2562 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2563 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2564 {
2565   PetscErrorCode ierr;
2566   Mat            *dummy;
2567 
2568   PetscFunctionBegin;
2569   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2570   *newmat = *dummy;
2571   ierr = PetscFree(dummy);CHKERRQ(ierr);
2572   PetscFunctionReturn(0);
2573 }
2574 
2575 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2576 /* -------------------------------------------------------------------*/
2577 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2578        MatGetRow_MPIAIJ,
2579        MatRestoreRow_MPIAIJ,
2580        MatMult_MPIAIJ,
2581 /* 4*/ MatMultAdd_MPIAIJ,
2582        MatMultTranspose_MPIAIJ,
2583        MatMultTransposeAdd_MPIAIJ,
2584 #ifdef PETSC_HAVE_PBGL
2585        MatSolve_MPIAIJ,
2586 #else
2587        0,
2588 #endif
2589        0,
2590        0,
2591 /*10*/ 0,
2592        0,
2593        0,
2594        MatSOR_MPIAIJ,
2595        MatTranspose_MPIAIJ,
2596 /*15*/ MatGetInfo_MPIAIJ,
2597        MatEqual_MPIAIJ,
2598        MatGetDiagonal_MPIAIJ,
2599        MatDiagonalScale_MPIAIJ,
2600        MatNorm_MPIAIJ,
2601 /*20*/ MatAssemblyBegin_MPIAIJ,
2602        MatAssemblyEnd_MPIAIJ,
2603        MatSetOption_MPIAIJ,
2604        MatZeroEntries_MPIAIJ,
2605 /*24*/ MatZeroRows_MPIAIJ,
2606        0,
2607 #ifdef PETSC_HAVE_PBGL
2608        0,
2609 #else
2610        0,
2611 #endif
2612        0,
2613        0,
2614 /*29*/ MatSetUpPreallocation_MPIAIJ,
2615 #ifdef PETSC_HAVE_PBGL
2616        0,
2617 #else
2618        0,
2619 #endif
2620        0,
2621        0,
2622        0,
2623 /*34*/ MatDuplicate_MPIAIJ,
2624        0,
2625        0,
2626        0,
2627        0,
2628 /*39*/ MatAXPY_MPIAIJ,
2629        MatGetSubMatrices_MPIAIJ,
2630        MatIncreaseOverlap_MPIAIJ,
2631        MatGetValues_MPIAIJ,
2632        MatCopy_MPIAIJ,
2633 /*44*/ MatGetRowMax_MPIAIJ,
2634        MatScale_MPIAIJ,
2635        0,
2636        0,
2637        0,
2638 /*49*/ MatSetBlockSize_MPIAIJ,
2639        0,
2640        0,
2641        0,
2642        0,
2643 /*54*/ MatFDColoringCreate_MPIAIJ,
2644        0,
2645        MatSetUnfactored_MPIAIJ,
2646        MatPermute_MPIAIJ,
2647        0,
2648 /*59*/ MatGetSubMatrix_MPIAIJ,
2649        MatDestroy_MPIAIJ,
2650        MatView_MPIAIJ,
2651        0,
2652        0,
2653 /*64*/ 0,
2654        0,
2655        0,
2656        0,
2657        0,
2658 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2659        MatGetRowMinAbs_MPIAIJ,
2660        0,
2661        MatSetColoring_MPIAIJ,
2662 #if defined(PETSC_HAVE_ADIC)
2663        MatSetValuesAdic_MPIAIJ,
2664 #else
2665        0,
2666 #endif
2667        MatSetValuesAdifor_MPIAIJ,
2668 /*75*/ MatFDColoringApply_AIJ,
2669        0,
2670        0,
2671        0,
2672        0,
2673 /*80*/ 0,
2674        0,
2675        0,
2676 /*83*/ MatLoad_MPIAIJ,
2677        0,
2678        0,
2679        0,
2680        0,
2681        0,
2682 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2683        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2684        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2685        MatPtAP_Basic,
2686        MatPtAPSymbolic_MPIAIJ,
2687 /*94*/ MatPtAPNumeric_MPIAIJ,
2688        0,
2689        0,
2690        0,
2691        0,
2692 /*99*/ 0,
2693        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2694        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2695        MatConjugate_MPIAIJ,
2696        0,
2697 /*104*/MatSetValuesRow_MPIAIJ,
2698        MatRealPart_MPIAIJ,
2699        MatImaginaryPart_MPIAIJ,
2700        0,
2701        0,
2702 /*109*/0,
2703        MatGetRedundantMatrix_MPIAIJ,
2704        MatGetRowMin_MPIAIJ,
2705        0,
2706        0,
2707 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
2708        0,
2709        0,
2710        0,
2711        0,
2712        0
2713 };
2714 
2715 /* ----------------------------------------------------------------------------------------*/
2716 
2717 EXTERN_C_BEGIN
2718 #undef __FUNCT__
2719 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2720 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2721 {
2722   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2723   PetscErrorCode ierr;
2724 
2725   PetscFunctionBegin;
2726   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2727   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2728   PetscFunctionReturn(0);
2729 }
2730 EXTERN_C_END
2731 
2732 EXTERN_C_BEGIN
2733 #undef __FUNCT__
2734 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2735 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2736 {
2737   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2738   PetscErrorCode ierr;
2739 
2740   PetscFunctionBegin;
2741   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2742   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2743   PetscFunctionReturn(0);
2744 }
2745 EXTERN_C_END
2746 
2747 #include "petscpc.h"
2748 EXTERN_C_BEGIN
2749 #undef __FUNCT__
2750 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2751 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2752 {
2753   Mat_MPIAIJ     *b;
2754   PetscErrorCode ierr;
2755   PetscInt       i;
2756 
2757   PetscFunctionBegin;
2758   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2759   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2760   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2761   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2762 
2763   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2764   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2765   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2766   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2767   if (d_nnz) {
2768     for (i=0; i<B->rmap->n; i++) {
2769       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2770     }
2771   }
2772   if (o_nnz) {
2773     for (i=0; i<B->rmap->n; i++) {
2774       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2775     }
2776   }
2777   b = (Mat_MPIAIJ*)B->data;
2778 
2779   if (!B->preallocated) {
2780     /* Explicitly create 2 MATSEQAIJ matrices. */
2781     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2782     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2783     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2784     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2785     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2786     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2787     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2788     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2789   }
2790 
2791   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2792   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2793   B->preallocated = PETSC_TRUE;
2794   PetscFunctionReturn(0);
2795 }
2796 EXTERN_C_END
2797 
2798 #undef __FUNCT__
2799 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2800 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2801 {
2802   Mat            mat;
2803   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2804   PetscErrorCode ierr;
2805 
2806   PetscFunctionBegin;
2807   *newmat       = 0;
2808   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2809   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2810   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2811   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2812   a    = (Mat_MPIAIJ*)mat->data;
2813 
2814   mat->factortype    = matin->factortype;
2815   mat->rmap->bs      = matin->rmap->bs;
2816   mat->assembled    = PETSC_TRUE;
2817   mat->insertmode   = NOT_SET_VALUES;
2818   mat->preallocated = PETSC_TRUE;
2819 
2820   a->size           = oldmat->size;
2821   a->rank           = oldmat->rank;
2822   a->donotstash     = oldmat->donotstash;
2823   a->roworiented    = oldmat->roworiented;
2824   a->rowindices     = 0;
2825   a->rowvalues      = 0;
2826   a->getrowactive   = PETSC_FALSE;
2827 
2828   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2829   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2830 
2831   if (oldmat->colmap) {
2832 #if defined (PETSC_USE_CTABLE)
2833     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2834 #else
2835     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2836     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2837     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2838 #endif
2839   } else a->colmap = 0;
2840   if (oldmat->garray) {
2841     PetscInt len;
2842     len  = oldmat->B->cmap->n;
2843     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2844     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2845     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2846   } else a->garray = 0;
2847 
2848   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2849   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2850   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2851   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2852   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2853   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2854   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2855   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2856   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2857   *newmat = mat;
2858   PetscFunctionReturn(0);
2859 }
2860 
2861 #undef __FUNCT__
2862 #define __FUNCT__ "MatLoad_MPIAIJ"
2863 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2864 {
2865   Mat            A;
2866   PetscScalar    *vals,*svals;
2867   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2868   MPI_Status     status;
2869   PetscErrorCode ierr;
2870   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz;
2871   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2872   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2873   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2874   PetscInt       cend,cstart,n,*rowners;
2875   int            fd;
2876 
2877   PetscFunctionBegin;
2878   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2879   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2880   if (!rank) {
2881     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2882     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2883     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2884   }
2885 
2886   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2887   M = header[1]; N = header[2];
2888   /* determine ownership of all rows */
2889   m    = M/size + ((M % size) > rank);
2890   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2891   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2892 
2893   /* First process needs enough room for process with most rows */
2894   if (!rank) {
2895     mmax       = rowners[1];
2896     for (i=2; i<size; i++) {
2897       mmax = PetscMax(mmax,rowners[i]);
2898     }
2899   } else mmax = m;
2900 
2901   rowners[0] = 0;
2902   for (i=2; i<=size; i++) {
2903     rowners[i] += rowners[i-1];
2904   }
2905   rstart = rowners[rank];
2906   rend   = rowners[rank+1];
2907 
2908   /* distribute row lengths to all processors */
2909   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2910   if (!rank) {
2911     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2912     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2913     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2914     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2915     for (j=0; j<m; j++) {
2916       procsnz[0] += ourlens[j];
2917     }
2918     for (i=1; i<size; i++) {
2919       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2920       /* calculate the number of nonzeros on each processor */
2921       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2922         procsnz[i] += rowlengths[j];
2923       }
2924       mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]);
2925       ierr   = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2926     }
2927     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2928   } else {
2929     mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr);
2930     ierr   = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2931   }
2932 
2933   if (!rank) {
2934     /* determine max buffer needed and allocate it */
2935     maxnz = 0;
2936     for (i=0; i<size; i++) {
2937       maxnz = PetscMax(maxnz,procsnz[i]);
2938     }
2939     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2940 
2941     /* read in my part of the matrix column indices  */
2942     nz   = procsnz[0];
2943     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2944     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2945 
2946     /* read in every one elses and ship off */
2947     for (i=1; i<size; i++) {
2948       nz     = procsnz[i];
2949       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2950       mpicnt = PetscMPIIntCast(nz);
2951       ierr   = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2952     }
2953     ierr = PetscFree(cols);CHKERRQ(ierr);
2954   } else {
2955     /* determine buffer space needed for message */
2956     nz = 0;
2957     for (i=0; i<m; i++) {
2958       nz += ourlens[i];
2959     }
2960     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2961 
2962     /* receive message of column indices*/
2963     mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr);
2964     ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2965     ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr);
2966     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
2967     else if (mpimaxnz < 0) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
2968     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
2969   }
2970 
2971   /* determine column ownership if matrix is not square */
2972   if (N != M) {
2973     n      = N/size + ((N % size) > rank);
2974     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2975     cstart = cend - n;
2976   } else {
2977     cstart = rstart;
2978     cend   = rend;
2979     n      = cend - cstart;
2980   }
2981 
2982   /* loop over local rows, determining number of off diagonal entries */
2983   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2984   jj = 0;
2985   for (i=0; i<m; i++) {
2986     for (j=0; j<ourlens[i]; j++) {
2987       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2988       jj++;
2989     }
2990   }
2991 
2992   /* create our matrix */
2993   for (i=0; i<m; i++) {
2994     ourlens[i] -= offlens[i];
2995   }
2996   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2997   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2998   ierr = MatSetType(A,type);CHKERRQ(ierr);
2999   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
3000 
3001   for (i=0; i<m; i++) {
3002     ourlens[i] += offlens[i];
3003   }
3004 
3005   if (!rank) {
3006     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3007 
3008     /* read in my part of the matrix numerical values  */
3009     nz   = procsnz[0];
3010     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3011 
3012     /* insert into matrix */
3013     jj      = rstart;
3014     smycols = mycols;
3015     svals   = vals;
3016     for (i=0; i<m; i++) {
3017       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3018       smycols += ourlens[i];
3019       svals   += ourlens[i];
3020       jj++;
3021     }
3022 
3023     /* read in other processors and ship out */
3024     for (i=1; i<size; i++) {
3025       nz     = procsnz[i];
3026       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3027       mpicnt = PetscMPIIntCast(nz);
3028       ierr   = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3029     }
3030     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3031   } else {
3032     /* receive numeric values */
3033     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3034 
3035     /* receive message of values*/
3036     mpicnt = PetscMPIIntCast(nz);
3037     ierr   = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
3038     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr);
3039     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
3040     else if (mpimaxnz < 0) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
3041     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
3042     /* insert into matrix */
3043     jj      = rstart;
3044     smycols = mycols;
3045     svals   = vals;
3046     for (i=0; i<m; i++) {
3047       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3048       smycols += ourlens[i];
3049       svals   += ourlens[i];
3050       jj++;
3051     }
3052   }
3053   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3054   ierr = PetscFree(vals);CHKERRQ(ierr);
3055   ierr = PetscFree(mycols);CHKERRQ(ierr);
3056   ierr = PetscFree(rowners);CHKERRQ(ierr);
3057 
3058   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3059   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3060   *newmat = A;
3061   PetscFunctionReturn(0);
3062 }
3063 
3064 #undef __FUNCT__
3065 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3066 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3067 {
3068   PetscErrorCode ierr;
3069   IS             iscol_local;
3070   PetscInt       csize;
3071 
3072   PetscFunctionBegin;
3073   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3074   if (call == MAT_REUSE_MATRIX) {
3075     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3076     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3077   } else {
3078     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3079   }
3080   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3081   if (call == MAT_INITIAL_MATRIX) {
3082     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3083     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3084   }
3085   PetscFunctionReturn(0);
3086 }
3087 
3088 #undef __FUNCT__
3089 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3090 /*
3091     Not great since it makes two copies of the submatrix, first an SeqAIJ
3092   in local and then by concatenating the local matrices the end result.
3093   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3094 
3095   Note: This requires a sequential iscol with all indices.
3096 */
3097 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3098 {
3099   PetscErrorCode ierr;
3100   PetscMPIInt    rank,size;
3101   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3102   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3103   Mat            *local,M,Mreuse;
3104   MatScalar      *vwork,*aa;
3105   MPI_Comm       comm = ((PetscObject)mat)->comm;
3106   Mat_SeqAIJ     *aij;
3107 
3108 
3109   PetscFunctionBegin;
3110   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3111   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3112 
3113   if (call ==  MAT_REUSE_MATRIX) {
3114     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3115     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3116     local = &Mreuse;
3117     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3118   } else {
3119     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3120     Mreuse = *local;
3121     ierr   = PetscFree(local);CHKERRQ(ierr);
3122   }
3123 
3124   /*
3125       m - number of local rows
3126       n - number of columns (same on all processors)
3127       rstart - first row in new global matrix generated
3128   */
3129   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3130   if (call == MAT_INITIAL_MATRIX) {
3131     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3132     ii  = aij->i;
3133     jj  = aij->j;
3134 
3135     /*
3136         Determine the number of non-zeros in the diagonal and off-diagonal
3137         portions of the matrix in order to do correct preallocation
3138     */
3139 
3140     /* first get start and end of "diagonal" columns */
3141     if (csize == PETSC_DECIDE) {
3142       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3143       if (mglobal == n) { /* square matrix */
3144 	nlocal = m;
3145       } else {
3146         nlocal = n/size + ((n % size) > rank);
3147       }
3148     } else {
3149       nlocal = csize;
3150     }
3151     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3152     rstart = rend - nlocal;
3153     if (rank == size - 1 && rend != n) {
3154       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3155     }
3156 
3157     /* next, compute all the lengths */
3158     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3159     olens = dlens + m;
3160     for (i=0; i<m; i++) {
3161       jend = ii[i+1] - ii[i];
3162       olen = 0;
3163       dlen = 0;
3164       for (j=0; j<jend; j++) {
3165         if (*jj < rstart || *jj >= rend) olen++;
3166         else dlen++;
3167         jj++;
3168       }
3169       olens[i] = olen;
3170       dlens[i] = dlen;
3171     }
3172     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3173     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3174     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3175     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3176     ierr = PetscFree(dlens);CHKERRQ(ierr);
3177   } else {
3178     PetscInt ml,nl;
3179 
3180     M = *newmat;
3181     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3182     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3183     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3184     /*
3185          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3186        rather than the slower MatSetValues().
3187     */
3188     M->was_assembled = PETSC_TRUE;
3189     M->assembled     = PETSC_FALSE;
3190   }
3191   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3192   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3193   ii  = aij->i;
3194   jj  = aij->j;
3195   aa  = aij->a;
3196   for (i=0; i<m; i++) {
3197     row   = rstart + i;
3198     nz    = ii[i+1] - ii[i];
3199     cwork = jj;     jj += nz;
3200     vwork = aa;     aa += nz;
3201     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3202   }
3203 
3204   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3205   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3206   *newmat = M;
3207 
3208   /* save submatrix used in processor for next request */
3209   if (call ==  MAT_INITIAL_MATRIX) {
3210     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3211     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3212   }
3213 
3214   PetscFunctionReturn(0);
3215 }
3216 
3217 EXTERN_C_BEGIN
3218 #undef __FUNCT__
3219 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3220 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3221 {
3222   PetscInt       m,cstart, cend,j,nnz,i,d;
3223   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3224   const PetscInt *JJ;
3225   PetscScalar    *values;
3226   PetscErrorCode ierr;
3227 
3228   PetscFunctionBegin;
3229   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3230 
3231   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3232   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3233   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3234   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3235   m      = B->rmap->n;
3236   cstart = B->cmap->rstart;
3237   cend   = B->cmap->rend;
3238   rstart = B->rmap->rstart;
3239 
3240   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3241 
3242 #if defined(PETSC_USE_DEBUGGING)
3243   for (i=0; i<m; i++) {
3244     nnz     = Ii[i+1]- Ii[i];
3245     JJ      = J + Ii[i];
3246     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3247     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3248     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3249   }
3250 #endif
3251 
3252   for (i=0; i<m; i++) {
3253     nnz     = Ii[i+1]- Ii[i];
3254     JJ      = J + Ii[i];
3255     nnz_max = PetscMax(nnz_max,nnz);
3256     d       = 0;
3257     for (j=0; j<nnz; j++) {
3258       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3259     }
3260     d_nnz[i] = d;
3261     o_nnz[i] = nnz - d;
3262   }
3263   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3264   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3265 
3266   if (v) values = (PetscScalar*)v;
3267   else {
3268     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3269     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3270   }
3271 
3272   for (i=0; i<m; i++) {
3273     ii   = i + rstart;
3274     nnz  = Ii[i+1]- Ii[i];
3275     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3276   }
3277   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3278   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3279 
3280   if (!v) {
3281     ierr = PetscFree(values);CHKERRQ(ierr);
3282   }
3283   PetscFunctionReturn(0);
3284 }
3285 EXTERN_C_END
3286 
3287 #undef __FUNCT__
3288 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3289 /*@
3290    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3291    (the default parallel PETSc format).
3292 
3293    Collective on MPI_Comm
3294 
3295    Input Parameters:
3296 +  B - the matrix
3297 .  i - the indices into j for the start of each local row (starts with zero)
3298 .  j - the column indices for each local row (starts with zero)
3299 -  v - optional values in the matrix
3300 
3301    Level: developer
3302 
3303    Notes:
3304        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3305      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3306      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3307 
3308        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3309 
3310        The format which is used for the sparse matrix input, is equivalent to a
3311     row-major ordering.. i.e for the following matrix, the input data expected is
3312     as shown:
3313 
3314         1 0 0
3315         2 0 3     P0
3316        -------
3317         4 5 6     P1
3318 
3319      Process0 [P0]: rows_owned=[0,1]
3320         i =  {0,1,3}  [size = nrow+1  = 2+1]
3321         j =  {0,0,2}  [size = nz = 6]
3322         v =  {1,2,3}  [size = nz = 6]
3323 
3324      Process1 [P1]: rows_owned=[2]
3325         i =  {0,3}    [size = nrow+1  = 1+1]
3326         j =  {0,1,2}  [size = nz = 6]
3327         v =  {4,5,6}  [size = nz = 6]
3328 
3329 .keywords: matrix, aij, compressed row, sparse, parallel
3330 
3331 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3332           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3333 @*/
3334 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3335 {
3336   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3337 
3338   PetscFunctionBegin;
3339   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3340   if (f) {
3341     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3342   }
3343   PetscFunctionReturn(0);
3344 }
3345 
3346 #undef __FUNCT__
3347 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3348 /*@C
3349    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3350    (the default parallel PETSc format).  For good matrix assembly performance
3351    the user should preallocate the matrix storage by setting the parameters
3352    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3353    performance can be increased by more than a factor of 50.
3354 
3355    Collective on MPI_Comm
3356 
3357    Input Parameters:
3358 +  A - the matrix
3359 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3360            (same value is used for all local rows)
3361 .  d_nnz - array containing the number of nonzeros in the various rows of the
3362            DIAGONAL portion of the local submatrix (possibly different for each row)
3363            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3364            The size of this array is equal to the number of local rows, i.e 'm'.
3365            You must leave room for the diagonal entry even if it is zero.
3366 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3367            submatrix (same value is used for all local rows).
3368 -  o_nnz - array containing the number of nonzeros in the various rows of the
3369            OFF-DIAGONAL portion of the local submatrix (possibly different for
3370            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3371            structure. The size of this array is equal to the number
3372            of local rows, i.e 'm'.
3373 
3374    If the *_nnz parameter is given then the *_nz parameter is ignored
3375 
3376    The AIJ format (also called the Yale sparse matrix format or
3377    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3378    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3379 
3380    The parallel matrix is partitioned such that the first m0 rows belong to
3381    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3382    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3383 
3384    The DIAGONAL portion of the local submatrix of a processor can be defined
3385    as the submatrix which is obtained by extraction the part corresponding
3386    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3387    first row that belongs to the processor, and r2 is the last row belonging
3388    to the this processor. This is a square mxm matrix. The remaining portion
3389    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3390 
3391    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3392 
3393    You can call MatGetInfo() to get information on how effective the preallocation was;
3394    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3395    You can also run with the option -info and look for messages with the string
3396    malloc in them to see if additional memory allocation was needed.
3397 
3398    Example usage:
3399 
3400    Consider the following 8x8 matrix with 34 non-zero values, that is
3401    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3402    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3403    as follows:
3404 
3405 .vb
3406             1  2  0  |  0  3  0  |  0  4
3407     Proc0   0  5  6  |  7  0  0  |  8  0
3408             9  0 10  | 11  0  0  | 12  0
3409     -------------------------------------
3410            13  0 14  | 15 16 17  |  0  0
3411     Proc1   0 18  0  | 19 20 21  |  0  0
3412             0  0  0  | 22 23  0  | 24  0
3413     -------------------------------------
3414     Proc2  25 26 27  |  0  0 28  | 29  0
3415            30  0  0  | 31 32 33  |  0 34
3416 .ve
3417 
3418    This can be represented as a collection of submatrices as:
3419 
3420 .vb
3421       A B C
3422       D E F
3423       G H I
3424 .ve
3425 
3426    Where the submatrices A,B,C are owned by proc0, D,E,F are
3427    owned by proc1, G,H,I are owned by proc2.
3428 
3429    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3430    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3431    The 'M','N' parameters are 8,8, and have the same values on all procs.
3432 
3433    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3434    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3435    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3436    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3437    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3438    matrix, ans [DF] as another SeqAIJ matrix.
3439 
3440    When d_nz, o_nz parameters are specified, d_nz storage elements are
3441    allocated for every row of the local diagonal submatrix, and o_nz
3442    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3443    One way to choose d_nz and o_nz is to use the max nonzerors per local
3444    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3445    In this case, the values of d_nz,o_nz are:
3446 .vb
3447      proc0 : dnz = 2, o_nz = 2
3448      proc1 : dnz = 3, o_nz = 2
3449      proc2 : dnz = 1, o_nz = 4
3450 .ve
3451    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3452    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3453    for proc3. i.e we are using 12+15+10=37 storage locations to store
3454    34 values.
3455 
3456    When d_nnz, o_nnz parameters are specified, the storage is specified
3457    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3458    In the above case the values for d_nnz,o_nnz are:
3459 .vb
3460      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3461      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3462      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3463 .ve
3464    Here the space allocated is sum of all the above values i.e 34, and
3465    hence pre-allocation is perfect.
3466 
3467    Level: intermediate
3468 
3469 .keywords: matrix, aij, compressed row, sparse, parallel
3470 
3471 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3472           MPIAIJ, MatGetInfo()
3473 @*/
3474 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3475 {
3476   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3477 
3478   PetscFunctionBegin;
3479   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3480   if (f) {
3481     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3482   }
3483   PetscFunctionReturn(0);
3484 }
3485 
3486 #undef __FUNCT__
3487 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3488 /*@
3489      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3490          CSR format the local rows.
3491 
3492    Collective on MPI_Comm
3493 
3494    Input Parameters:
3495 +  comm - MPI communicator
3496 .  m - number of local rows (Cannot be PETSC_DECIDE)
3497 .  n - This value should be the same as the local size used in creating the
3498        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3499        calculated if N is given) For square matrices n is almost always m.
3500 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3501 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3502 .   i - row indices
3503 .   j - column indices
3504 -   a - matrix values
3505 
3506    Output Parameter:
3507 .   mat - the matrix
3508 
3509    Level: intermediate
3510 
3511    Notes:
3512        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3513      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3514      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3515 
3516        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3517 
3518        The format which is used for the sparse matrix input, is equivalent to a
3519     row-major ordering.. i.e for the following matrix, the input data expected is
3520     as shown:
3521 
3522         1 0 0
3523         2 0 3     P0
3524        -------
3525         4 5 6     P1
3526 
3527      Process0 [P0]: rows_owned=[0,1]
3528         i =  {0,1,3}  [size = nrow+1  = 2+1]
3529         j =  {0,0,2}  [size = nz = 6]
3530         v =  {1,2,3}  [size = nz = 6]
3531 
3532      Process1 [P1]: rows_owned=[2]
3533         i =  {0,3}    [size = nrow+1  = 1+1]
3534         j =  {0,1,2}  [size = nz = 6]
3535         v =  {4,5,6}  [size = nz = 6]
3536 
3537 .keywords: matrix, aij, compressed row, sparse, parallel
3538 
3539 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3540           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3541 @*/
3542 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3543 {
3544   PetscErrorCode ierr;
3545 
3546  PetscFunctionBegin;
3547   if (i[0]) {
3548     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3549   }
3550   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3551   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3552   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3553   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3554   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3555   PetscFunctionReturn(0);
3556 }
3557 
3558 #undef __FUNCT__
3559 #define __FUNCT__ "MatCreateMPIAIJ"
3560 /*@C
3561    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3562    (the default parallel PETSc format).  For good matrix assembly performance
3563    the user should preallocate the matrix storage by setting the parameters
3564    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3565    performance can be increased by more than a factor of 50.
3566 
3567    Collective on MPI_Comm
3568 
3569    Input Parameters:
3570 +  comm - MPI communicator
3571 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3572            This value should be the same as the local size used in creating the
3573            y vector for the matrix-vector product y = Ax.
3574 .  n - This value should be the same as the local size used in creating the
3575        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3576        calculated if N is given) For square matrices n is almost always m.
3577 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3578 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3579 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3580            (same value is used for all local rows)
3581 .  d_nnz - array containing the number of nonzeros in the various rows of the
3582            DIAGONAL portion of the local submatrix (possibly different for each row)
3583            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3584            The size of this array is equal to the number of local rows, i.e 'm'.
3585            You must leave room for the diagonal entry even if it is zero.
3586 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3587            submatrix (same value is used for all local rows).
3588 -  o_nnz - array containing the number of nonzeros in the various rows of the
3589            OFF-DIAGONAL portion of the local submatrix (possibly different for
3590            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3591            structure. The size of this array is equal to the number
3592            of local rows, i.e 'm'.
3593 
3594    Output Parameter:
3595 .  A - the matrix
3596 
3597    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3598    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3599    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3600 
3601    Notes:
3602    If the *_nnz parameter is given then the *_nz parameter is ignored
3603 
3604    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3605    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3606    storage requirements for this matrix.
3607 
3608    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3609    processor than it must be used on all processors that share the object for
3610    that argument.
3611 
3612    The user MUST specify either the local or global matrix dimensions
3613    (possibly both).
3614 
3615    The parallel matrix is partitioned across processors such that the
3616    first m0 rows belong to process 0, the next m1 rows belong to
3617    process 1, the next m2 rows belong to process 2 etc.. where
3618    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3619    values corresponding to [m x N] submatrix.
3620 
3621    The columns are logically partitioned with the n0 columns belonging
3622    to 0th partition, the next n1 columns belonging to the next
3623    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3624 
3625    The DIAGONAL portion of the local submatrix on any given processor
3626    is the submatrix corresponding to the rows and columns m,n
3627    corresponding to the given processor. i.e diagonal matrix on
3628    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3629    etc. The remaining portion of the local submatrix [m x (N-n)]
3630    constitute the OFF-DIAGONAL portion. The example below better
3631    illustrates this concept.
3632 
3633    For a square global matrix we define each processor's diagonal portion
3634    to be its local rows and the corresponding columns (a square submatrix);
3635    each processor's off-diagonal portion encompasses the remainder of the
3636    local matrix (a rectangular submatrix).
3637 
3638    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3639 
3640    When calling this routine with a single process communicator, a matrix of
3641    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3642    type of communicator, use the construction mechanism:
3643      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3644 
3645    By default, this format uses inodes (identical nodes) when possible.
3646    We search for consecutive rows with the same nonzero structure, thereby
3647    reusing matrix information to achieve increased efficiency.
3648 
3649    Options Database Keys:
3650 +  -mat_no_inode  - Do not use inodes
3651 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3652 -  -mat_aij_oneindex - Internally use indexing starting at 1
3653         rather than 0.  Note that when calling MatSetValues(),
3654         the user still MUST index entries starting at 0!
3655 
3656 
3657    Example usage:
3658 
3659    Consider the following 8x8 matrix with 34 non-zero values, that is
3660    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3661    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3662    as follows:
3663 
3664 .vb
3665             1  2  0  |  0  3  0  |  0  4
3666     Proc0   0  5  6  |  7  0  0  |  8  0
3667             9  0 10  | 11  0  0  | 12  0
3668     -------------------------------------
3669            13  0 14  | 15 16 17  |  0  0
3670     Proc1   0 18  0  | 19 20 21  |  0  0
3671             0  0  0  | 22 23  0  | 24  0
3672     -------------------------------------
3673     Proc2  25 26 27  |  0  0 28  | 29  0
3674            30  0  0  | 31 32 33  |  0 34
3675 .ve
3676 
3677    This can be represented as a collection of submatrices as:
3678 
3679 .vb
3680       A B C
3681       D E F
3682       G H I
3683 .ve
3684 
3685    Where the submatrices A,B,C are owned by proc0, D,E,F are
3686    owned by proc1, G,H,I are owned by proc2.
3687 
3688    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3689    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3690    The 'M','N' parameters are 8,8, and have the same values on all procs.
3691 
3692    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3693    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3694    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3695    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3696    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3697    matrix, ans [DF] as another SeqAIJ matrix.
3698 
3699    When d_nz, o_nz parameters are specified, d_nz storage elements are
3700    allocated for every row of the local diagonal submatrix, and o_nz
3701    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3702    One way to choose d_nz and o_nz is to use the max nonzerors per local
3703    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3704    In this case, the values of d_nz,o_nz are:
3705 .vb
3706      proc0 : dnz = 2, o_nz = 2
3707      proc1 : dnz = 3, o_nz = 2
3708      proc2 : dnz = 1, o_nz = 4
3709 .ve
3710    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3711    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3712    for proc3. i.e we are using 12+15+10=37 storage locations to store
3713    34 values.
3714 
3715    When d_nnz, o_nnz parameters are specified, the storage is specified
3716    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3717    In the above case the values for d_nnz,o_nnz are:
3718 .vb
3719      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3720      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3721      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3722 .ve
3723    Here the space allocated is sum of all the above values i.e 34, and
3724    hence pre-allocation is perfect.
3725 
3726    Level: intermediate
3727 
3728 .keywords: matrix, aij, compressed row, sparse, parallel
3729 
3730 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3731           MPIAIJ, MatCreateMPIAIJWithArrays()
3732 @*/
3733 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3734 {
3735   PetscErrorCode ierr;
3736   PetscMPIInt    size;
3737 
3738   PetscFunctionBegin;
3739   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3740   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3741   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3742   if (size > 1) {
3743     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3744     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3745   } else {
3746     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3747     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3748   }
3749   PetscFunctionReturn(0);
3750 }
3751 
3752 #undef __FUNCT__
3753 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3754 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3755 {
3756   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3757 
3758   PetscFunctionBegin;
3759   *Ad     = a->A;
3760   *Ao     = a->B;
3761   *colmap = a->garray;
3762   PetscFunctionReturn(0);
3763 }
3764 
3765 #undef __FUNCT__
3766 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3767 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3768 {
3769   PetscErrorCode ierr;
3770   PetscInt       i;
3771   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3772 
3773   PetscFunctionBegin;
3774   if (coloring->ctype == IS_COLORING_GLOBAL) {
3775     ISColoringValue *allcolors,*colors;
3776     ISColoring      ocoloring;
3777 
3778     /* set coloring for diagonal portion */
3779     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3780 
3781     /* set coloring for off-diagonal portion */
3782     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3783     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3784     for (i=0; i<a->B->cmap->n; i++) {
3785       colors[i] = allcolors[a->garray[i]];
3786     }
3787     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3788     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3789     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3790     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3791   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3792     ISColoringValue *colors;
3793     PetscInt        *larray;
3794     ISColoring      ocoloring;
3795 
3796     /* set coloring for diagonal portion */
3797     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3798     for (i=0; i<a->A->cmap->n; i++) {
3799       larray[i] = i + A->cmap->rstart;
3800     }
3801     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3802     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3803     for (i=0; i<a->A->cmap->n; i++) {
3804       colors[i] = coloring->colors[larray[i]];
3805     }
3806     ierr = PetscFree(larray);CHKERRQ(ierr);
3807     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3808     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3809     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3810 
3811     /* set coloring for off-diagonal portion */
3812     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3813     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3814     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3815     for (i=0; i<a->B->cmap->n; i++) {
3816       colors[i] = coloring->colors[larray[i]];
3817     }
3818     ierr = PetscFree(larray);CHKERRQ(ierr);
3819     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3820     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3821     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3822   } else {
3823     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3824   }
3825 
3826   PetscFunctionReturn(0);
3827 }
3828 
3829 #if defined(PETSC_HAVE_ADIC)
3830 #undef __FUNCT__
3831 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3832 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3833 {
3834   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3835   PetscErrorCode ierr;
3836 
3837   PetscFunctionBegin;
3838   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3839   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3840   PetscFunctionReturn(0);
3841 }
3842 #endif
3843 
3844 #undef __FUNCT__
3845 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3846 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3847 {
3848   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3849   PetscErrorCode ierr;
3850 
3851   PetscFunctionBegin;
3852   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3853   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3854   PetscFunctionReturn(0);
3855 }
3856 
3857 #undef __FUNCT__
3858 #define __FUNCT__ "MatMerge"
3859 /*@
3860       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3861                  matrices from each processor
3862 
3863     Collective on MPI_Comm
3864 
3865    Input Parameters:
3866 +    comm - the communicators the parallel matrix will live on
3867 .    inmat - the input sequential matrices
3868 .    n - number of local columns (or PETSC_DECIDE)
3869 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3870 
3871    Output Parameter:
3872 .    outmat - the parallel matrix generated
3873 
3874     Level: advanced
3875 
3876    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3877 
3878 @*/
3879 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3880 {
3881   PetscErrorCode ierr;
3882   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3883   PetscInt       *indx;
3884   PetscScalar    *values;
3885 
3886   PetscFunctionBegin;
3887   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3888   if (scall == MAT_INITIAL_MATRIX){
3889     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3890     if (n == PETSC_DECIDE){
3891       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3892     }
3893     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3894     rstart -= m;
3895 
3896     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3897     for (i=0;i<m;i++) {
3898       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3899       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3900       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3901     }
3902     /* This routine will ONLY return MPIAIJ type matrix */
3903     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3904     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3905     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3906     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3907     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3908 
3909   } else if (scall == MAT_REUSE_MATRIX){
3910     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3911   } else {
3912     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3913   }
3914 
3915   for (i=0;i<m;i++) {
3916     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3917     Ii    = i + rstart;
3918     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3919     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3920   }
3921   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3922   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3923   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3924 
3925   PetscFunctionReturn(0);
3926 }
3927 
3928 #undef __FUNCT__
3929 #define __FUNCT__ "MatFileSplit"
3930 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3931 {
3932   PetscErrorCode    ierr;
3933   PetscMPIInt       rank;
3934   PetscInt          m,N,i,rstart,nnz;
3935   size_t            len;
3936   const PetscInt    *indx;
3937   PetscViewer       out;
3938   char              *name;
3939   Mat               B;
3940   const PetscScalar *values;
3941 
3942   PetscFunctionBegin;
3943   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3944   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3945   /* Should this be the type of the diagonal block of A? */
3946   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3947   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3948   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3949   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3950   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3951   for (i=0;i<m;i++) {
3952     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3953     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3954     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3955   }
3956   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3957   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3958 
3959   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3960   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3961   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3962   sprintf(name,"%s.%d",outfile,rank);
3963   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3964   ierr = PetscFree(name);
3965   ierr = MatView(B,out);CHKERRQ(ierr);
3966   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3967   ierr = MatDestroy(B);CHKERRQ(ierr);
3968   PetscFunctionReturn(0);
3969 }
3970 
3971 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3972 #undef __FUNCT__
3973 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3974 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3975 {
3976   PetscErrorCode       ierr;
3977   Mat_Merge_SeqsToMPI  *merge;
3978   PetscContainer       container;
3979 
3980   PetscFunctionBegin;
3981   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3982   if (container) {
3983     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3984     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3985     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3986     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3987     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3988     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3989     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
3990     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3991     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
3992     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3993     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3994     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3995     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3996     ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr);
3997 
3998     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
3999     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4000   }
4001   ierr = PetscFree(merge);CHKERRQ(ierr);
4002 
4003   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4004   PetscFunctionReturn(0);
4005 }
4006 
4007 #include "../src/mat/utils/freespace.h"
4008 #include "petscbt.h"
4009 
4010 #undef __FUNCT__
4011 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4012 /*@C
4013       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4014                  matrices from each processor
4015 
4016     Collective on MPI_Comm
4017 
4018    Input Parameters:
4019 +    comm - the communicators the parallel matrix will live on
4020 .    seqmat - the input sequential matrices
4021 .    m - number of local rows (or PETSC_DECIDE)
4022 .    n - number of local columns (or PETSC_DECIDE)
4023 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4024 
4025    Output Parameter:
4026 .    mpimat - the parallel matrix generated
4027 
4028     Level: advanced
4029 
4030    Notes:
4031      The dimensions of the sequential matrix in each processor MUST be the same.
4032      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4033      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4034 @*/
4035 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4036 {
4037   PetscErrorCode       ierr;
4038   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4039   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4040   PetscMPIInt          size,rank,taga,*len_s;
4041   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4042   PetscInt             proc,m;
4043   PetscInt             **buf_ri,**buf_rj;
4044   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4045   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4046   MPI_Request          *s_waits,*r_waits;
4047   MPI_Status           *status;
4048   MatScalar            *aa=a->a;
4049   MatScalar            **abuf_r,*ba_i;
4050   Mat_Merge_SeqsToMPI  *merge;
4051   PetscContainer       container;
4052 
4053   PetscFunctionBegin;
4054   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4055 
4056   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4057   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4058 
4059   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4060   if (container) {
4061     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4062   }
4063   bi     = merge->bi;
4064   bj     = merge->bj;
4065   buf_ri = merge->buf_ri;
4066   buf_rj = merge->buf_rj;
4067 
4068   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4069   owners = merge->rowmap->range;
4070   len_s  = merge->len_s;
4071 
4072   /* send and recv matrix values */
4073   /*-----------------------------*/
4074   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4075   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4076 
4077   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4078   for (proc=0,k=0; proc<size; proc++){
4079     if (!len_s[proc]) continue;
4080     i = owners[proc];
4081     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4082     k++;
4083   }
4084 
4085   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4086   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4087   ierr = PetscFree(status);CHKERRQ(ierr);
4088 
4089   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4090   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4091 
4092   /* insert mat values of mpimat */
4093   /*----------------------------*/
4094   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4095   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4096 
4097   for (k=0; k<merge->nrecv; k++){
4098     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4099     nrows = *(buf_ri_k[k]);
4100     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4101     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4102   }
4103 
4104   /* set values of ba */
4105   m = merge->rowmap->n;
4106   for (i=0; i<m; i++) {
4107     arow = owners[rank] + i;
4108     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4109     bnzi = bi[i+1] - bi[i];
4110     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4111 
4112     /* add local non-zero vals of this proc's seqmat into ba */
4113     anzi = ai[arow+1] - ai[arow];
4114     aj   = a->j + ai[arow];
4115     aa   = a->a + ai[arow];
4116     nextaj = 0;
4117     for (j=0; nextaj<anzi; j++){
4118       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4119         ba_i[j] += aa[nextaj++];
4120       }
4121     }
4122 
4123     /* add received vals into ba */
4124     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4125       /* i-th row */
4126       if (i == *nextrow[k]) {
4127         anzi = *(nextai[k]+1) - *nextai[k];
4128         aj   = buf_rj[k] + *(nextai[k]);
4129         aa   = abuf_r[k] + *(nextai[k]);
4130         nextaj = 0;
4131         for (j=0; nextaj<anzi; j++){
4132           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4133             ba_i[j] += aa[nextaj++];
4134           }
4135         }
4136         nextrow[k]++; nextai[k]++;
4137       }
4138     }
4139     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4140   }
4141   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4142   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4143 
4144   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4145   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4146   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4147   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4148   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4149   PetscFunctionReturn(0);
4150 }
4151 
4152 #undef __FUNCT__
4153 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4154 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4155 {
4156   PetscErrorCode       ierr;
4157   Mat                  B_mpi;
4158   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4159   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4160   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4161   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4162   PetscInt             len,proc,*dnz,*onz;
4163   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4164   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4165   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4166   MPI_Status           *status;
4167   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4168   PetscBT              lnkbt;
4169   Mat_Merge_SeqsToMPI  *merge;
4170   PetscContainer       container;
4171 
4172   PetscFunctionBegin;
4173   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4174 
4175   /* make sure it is a PETSc comm */
4176   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4177   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4178   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4179 
4180   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4181   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4182 
4183   /* determine row ownership */
4184   /*---------------------------------------------------------*/
4185   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4186   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4187   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4188   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4189   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4190   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4191   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4192 
4193   m      = merge->rowmap->n;
4194   M      = merge->rowmap->N;
4195   owners = merge->rowmap->range;
4196 
4197   /* determine the number of messages to send, their lengths */
4198   /*---------------------------------------------------------*/
4199   len_s  = merge->len_s;
4200 
4201   len = 0;  /* length of buf_si[] */
4202   merge->nsend = 0;
4203   for (proc=0; proc<size; proc++){
4204     len_si[proc] = 0;
4205     if (proc == rank){
4206       len_s[proc] = 0;
4207     } else {
4208       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4209       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4210     }
4211     if (len_s[proc]) {
4212       merge->nsend++;
4213       nrows = 0;
4214       for (i=owners[proc]; i<owners[proc+1]; i++){
4215         if (ai[i+1] > ai[i]) nrows++;
4216       }
4217       len_si[proc] = 2*(nrows+1);
4218       len += len_si[proc];
4219     }
4220   }
4221 
4222   /* determine the number and length of messages to receive for ij-structure */
4223   /*-------------------------------------------------------------------------*/
4224   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4225   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4226 
4227   /* post the Irecv of j-structure */
4228   /*-------------------------------*/
4229   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4230   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4231 
4232   /* post the Isend of j-structure */
4233   /*--------------------------------*/
4234   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4235 
4236   for (proc=0, k=0; proc<size; proc++){
4237     if (!len_s[proc]) continue;
4238     i = owners[proc];
4239     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4240     k++;
4241   }
4242 
4243   /* receives and sends of j-structure are complete */
4244   /*------------------------------------------------*/
4245   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4246   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4247 
4248   /* send and recv i-structure */
4249   /*---------------------------*/
4250   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4251   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4252 
4253   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4254   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4255   for (proc=0,k=0; proc<size; proc++){
4256     if (!len_s[proc]) continue;
4257     /* form outgoing message for i-structure:
4258          buf_si[0]:                 nrows to be sent
4259                [1:nrows]:           row index (global)
4260                [nrows+1:2*nrows+1]: i-structure index
4261     */
4262     /*-------------------------------------------*/
4263     nrows = len_si[proc]/2 - 1;
4264     buf_si_i    = buf_si + nrows+1;
4265     buf_si[0]   = nrows;
4266     buf_si_i[0] = 0;
4267     nrows = 0;
4268     for (i=owners[proc]; i<owners[proc+1]; i++){
4269       anzi = ai[i+1] - ai[i];
4270       if (anzi) {
4271         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4272         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4273         nrows++;
4274       }
4275     }
4276     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4277     k++;
4278     buf_si += len_si[proc];
4279   }
4280 
4281   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4282   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4283 
4284   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4285   for (i=0; i<merge->nrecv; i++){
4286     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4287   }
4288 
4289   ierr = PetscFree(len_si);CHKERRQ(ierr);
4290   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4291   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4292   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4293   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4294   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4295   ierr = PetscFree(status);CHKERRQ(ierr);
4296 
4297   /* compute a local seq matrix in each processor */
4298   /*----------------------------------------------*/
4299   /* allocate bi array and free space for accumulating nonzero column info */
4300   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4301   bi[0] = 0;
4302 
4303   /* create and initialize a linked list */
4304   nlnk = N+1;
4305   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4306 
4307   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4308   len = 0;
4309   len  = ai[owners[rank+1]] - ai[owners[rank]];
4310   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4311   current_space = free_space;
4312 
4313   /* determine symbolic info for each local row */
4314   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4315 
4316   for (k=0; k<merge->nrecv; k++){
4317     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4318     nrows = *buf_ri_k[k];
4319     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4320     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4321   }
4322 
4323   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4324   len = 0;
4325   for (i=0;i<m;i++) {
4326     bnzi   = 0;
4327     /* add local non-zero cols of this proc's seqmat into lnk */
4328     arow   = owners[rank] + i;
4329     anzi   = ai[arow+1] - ai[arow];
4330     aj     = a->j + ai[arow];
4331     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4332     bnzi += nlnk;
4333     /* add received col data into lnk */
4334     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4335       if (i == *nextrow[k]) { /* i-th row */
4336         anzi = *(nextai[k]+1) - *nextai[k];
4337         aj   = buf_rj[k] + *nextai[k];
4338         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4339         bnzi += nlnk;
4340         nextrow[k]++; nextai[k]++;
4341       }
4342     }
4343     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4344 
4345     /* if free space is not available, make more free space */
4346     if (current_space->local_remaining<bnzi) {
4347       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4348       nspacedouble++;
4349     }
4350     /* copy data into free space, then initialize lnk */
4351     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4352     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4353 
4354     current_space->array           += bnzi;
4355     current_space->local_used      += bnzi;
4356     current_space->local_remaining -= bnzi;
4357 
4358     bi[i+1] = bi[i] + bnzi;
4359   }
4360 
4361   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4362 
4363   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4364   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4365   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4366 
4367   /* create symbolic parallel matrix B_mpi */
4368   /*---------------------------------------*/
4369   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4370   if (n==PETSC_DECIDE) {
4371     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4372   } else {
4373     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4374   }
4375   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4376   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4377   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4378 
4379   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4380   B_mpi->assembled     = PETSC_FALSE;
4381   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4382   merge->bi            = bi;
4383   merge->bj            = bj;
4384   merge->buf_ri        = buf_ri;
4385   merge->buf_rj        = buf_rj;
4386   merge->coi           = PETSC_NULL;
4387   merge->coj           = PETSC_NULL;
4388   merge->owners_co     = PETSC_NULL;
4389 
4390   /* attach the supporting struct to B_mpi for reuse */
4391   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4392   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4393   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4394   *mpimat = B_mpi;
4395 
4396   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4397   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4398   PetscFunctionReturn(0);
4399 }
4400 
4401 #undef __FUNCT__
4402 #define __FUNCT__ "MatMerge_SeqsToMPI"
4403 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4404 {
4405   PetscErrorCode   ierr;
4406 
4407   PetscFunctionBegin;
4408   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4409   if (scall == MAT_INITIAL_MATRIX){
4410     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4411   }
4412   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4413   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4414   PetscFunctionReturn(0);
4415 }
4416 
4417 #undef __FUNCT__
4418 #define __FUNCT__ "MatGetLocalMat"
4419 /*@
4420      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4421 
4422     Not Collective
4423 
4424    Input Parameters:
4425 +    A - the matrix
4426 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4427 
4428    Output Parameter:
4429 .    A_loc - the local sequential matrix generated
4430 
4431     Level: developer
4432 
4433 @*/
4434 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4435 {
4436   PetscErrorCode  ierr;
4437   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4438   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4439   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4440   MatScalar       *aa=a->a,*ba=b->a,*cam;
4441   PetscScalar     *ca;
4442   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4443   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4444 
4445   PetscFunctionBegin;
4446   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4447   if (scall == MAT_INITIAL_MATRIX){
4448     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4449     ci[0] = 0;
4450     for (i=0; i<am; i++){
4451       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4452     }
4453     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4454     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4455     k = 0;
4456     for (i=0; i<am; i++) {
4457       ncols_o = bi[i+1] - bi[i];
4458       ncols_d = ai[i+1] - ai[i];
4459       /* off-diagonal portion of A */
4460       for (jo=0; jo<ncols_o; jo++) {
4461         col = cmap[*bj];
4462         if (col >= cstart) break;
4463         cj[k]   = col; bj++;
4464         ca[k++] = *ba++;
4465       }
4466       /* diagonal portion of A */
4467       for (j=0; j<ncols_d; j++) {
4468         cj[k]   = cstart + *aj++;
4469         ca[k++] = *aa++;
4470       }
4471       /* off-diagonal portion of A */
4472       for (j=jo; j<ncols_o; j++) {
4473         cj[k]   = cmap[*bj++];
4474         ca[k++] = *ba++;
4475       }
4476     }
4477     /* put together the new matrix */
4478     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4479     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4480     /* Since these are PETSc arrays, change flags to free them as necessary. */
4481     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4482     mat->free_a  = PETSC_TRUE;
4483     mat->free_ij = PETSC_TRUE;
4484     mat->nonew   = 0;
4485   } else if (scall == MAT_REUSE_MATRIX){
4486     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4487     ci = mat->i; cj = mat->j; cam = mat->a;
4488     for (i=0; i<am; i++) {
4489       /* off-diagonal portion of A */
4490       ncols_o = bi[i+1] - bi[i];
4491       for (jo=0; jo<ncols_o; jo++) {
4492         col = cmap[*bj];
4493         if (col >= cstart) break;
4494         *cam++ = *ba++; bj++;
4495       }
4496       /* diagonal portion of A */
4497       ncols_d = ai[i+1] - ai[i];
4498       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4499       /* off-diagonal portion of A */
4500       for (j=jo; j<ncols_o; j++) {
4501         *cam++ = *ba++; bj++;
4502       }
4503     }
4504   } else {
4505     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4506   }
4507 
4508   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4509   PetscFunctionReturn(0);
4510 }
4511 
4512 #undef __FUNCT__
4513 #define __FUNCT__ "MatGetLocalMatCondensed"
4514 /*@C
4515      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4516 
4517     Not Collective
4518 
4519    Input Parameters:
4520 +    A - the matrix
4521 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4522 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4523 
4524    Output Parameter:
4525 .    A_loc - the local sequential matrix generated
4526 
4527     Level: developer
4528 
4529 @*/
4530 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4531 {
4532   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4533   PetscErrorCode    ierr;
4534   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4535   IS                isrowa,iscola;
4536   Mat               *aloc;
4537 
4538   PetscFunctionBegin;
4539   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4540   if (!row){
4541     start = A->rmap->rstart; end = A->rmap->rend;
4542     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4543   } else {
4544     isrowa = *row;
4545   }
4546   if (!col){
4547     start = A->cmap->rstart;
4548     cmap  = a->garray;
4549     nzA   = a->A->cmap->n;
4550     nzB   = a->B->cmap->n;
4551     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4552     ncols = 0;
4553     for (i=0; i<nzB; i++) {
4554       if (cmap[i] < start) idx[ncols++] = cmap[i];
4555       else break;
4556     }
4557     imark = i;
4558     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4559     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4560     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4561     ierr = PetscFree(idx);CHKERRQ(ierr);
4562   } else {
4563     iscola = *col;
4564   }
4565   if (scall != MAT_INITIAL_MATRIX){
4566     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4567     aloc[0] = *A_loc;
4568   }
4569   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4570   *A_loc = aloc[0];
4571   ierr = PetscFree(aloc);CHKERRQ(ierr);
4572   if (!row){
4573     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4574   }
4575   if (!col){
4576     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4577   }
4578   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4579   PetscFunctionReturn(0);
4580 }
4581 
4582 #undef __FUNCT__
4583 #define __FUNCT__ "MatGetBrowsOfAcols"
4584 /*@C
4585     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4586 
4587     Collective on Mat
4588 
4589    Input Parameters:
4590 +    A,B - the matrices in mpiaij format
4591 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4592 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4593 
4594    Output Parameter:
4595 +    rowb, colb - index sets of rows and columns of B to extract
4596 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4597 -    B_seq - the sequential matrix generated
4598 
4599     Level: developer
4600 
4601 @*/
4602 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4603 {
4604   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4605   PetscErrorCode    ierr;
4606   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4607   IS                isrowb,iscolb;
4608   Mat               *bseq;
4609 
4610   PetscFunctionBegin;
4611   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4612     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4613   }
4614   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4615 
4616   if (scall == MAT_INITIAL_MATRIX){
4617     start = A->cmap->rstart;
4618     cmap  = a->garray;
4619     nzA   = a->A->cmap->n;
4620     nzB   = a->B->cmap->n;
4621     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4622     ncols = 0;
4623     for (i=0; i<nzB; i++) {  /* row < local row index */
4624       if (cmap[i] < start) idx[ncols++] = cmap[i];
4625       else break;
4626     }
4627     imark = i;
4628     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4629     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4630     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4631     ierr = PetscFree(idx);CHKERRQ(ierr);
4632     *brstart = imark;
4633     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4634   } else {
4635     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4636     isrowb = *rowb; iscolb = *colb;
4637     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4638     bseq[0] = *B_seq;
4639   }
4640   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4641   *B_seq = bseq[0];
4642   ierr = PetscFree(bseq);CHKERRQ(ierr);
4643   if (!rowb){
4644     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4645   } else {
4646     *rowb = isrowb;
4647   }
4648   if (!colb){
4649     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4650   } else {
4651     *colb = iscolb;
4652   }
4653   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4654   PetscFunctionReturn(0);
4655 }
4656 
4657 #undef __FUNCT__
4658 #define __FUNCT__ "MatGetBrowsOfAoCols"
4659 /*@C
4660     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4661     of the OFF-DIAGONAL portion of local A
4662 
4663     Collective on Mat
4664 
4665    Input Parameters:
4666 +    A,B - the matrices in mpiaij format
4667 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4668 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4669 .    startsj_r - similar to startsj for receives
4670 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4671 
4672    Output Parameter:
4673 +    B_oth - the sequential matrix generated
4674 
4675     Level: developer
4676 
4677 @*/
4678 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4679 {
4680   VecScatter_MPI_General *gen_to,*gen_from;
4681   PetscErrorCode         ierr;
4682   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4683   Mat_SeqAIJ             *b_oth;
4684   VecScatter             ctx=a->Mvctx;
4685   MPI_Comm               comm=((PetscObject)ctx)->comm;
4686   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4687   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4688   PetscScalar            *rvalues,*svalues;
4689   MatScalar              *b_otha,*bufa,*bufA;
4690   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4691   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4692   MPI_Status             *sstatus,rstatus;
4693   PetscMPIInt            jj;
4694   PetscInt               *cols,sbs,rbs;
4695   PetscScalar            *vals;
4696 
4697   PetscFunctionBegin;
4698   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4699     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4700   }
4701   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4702   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4703 
4704   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4705   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4706   rvalues  = gen_from->values; /* holds the length of receiving row */
4707   svalues  = gen_to->values;   /* holds the length of sending row */
4708   nrecvs   = gen_from->n;
4709   nsends   = gen_to->n;
4710 
4711   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4712   srow     = gen_to->indices;   /* local row index to be sent */
4713   sstarts  = gen_to->starts;
4714   sprocs   = gen_to->procs;
4715   sstatus  = gen_to->sstatus;
4716   sbs      = gen_to->bs;
4717   rstarts  = gen_from->starts;
4718   rprocs   = gen_from->procs;
4719   rbs      = gen_from->bs;
4720 
4721   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4722   if (scall == MAT_INITIAL_MATRIX){
4723     /* i-array */
4724     /*---------*/
4725     /*  post receives */
4726     for (i=0; i<nrecvs; i++){
4727       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4728       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4729       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4730     }
4731 
4732     /* pack the outgoing message */
4733     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
4734     sstartsj[0] = 0;  rstartsj[0] = 0;
4735     len = 0; /* total length of j or a array to be sent */
4736     k = 0;
4737     for (i=0; i<nsends; i++){
4738       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4739       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4740       for (j=0; j<nrows; j++) {
4741         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4742         for (l=0; l<sbs; l++){
4743           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4744           rowlen[j*sbs+l] = ncols;
4745           len += ncols;
4746           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4747         }
4748         k++;
4749       }
4750       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4751       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4752     }
4753     /* recvs and sends of i-array are completed */
4754     i = nrecvs;
4755     while (i--) {
4756       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4757     }
4758     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4759 
4760     /* allocate buffers for sending j and a arrays */
4761     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4762     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4763 
4764     /* create i-array of B_oth */
4765     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4766     b_othi[0] = 0;
4767     len = 0; /* total length of j or a array to be received */
4768     k = 0;
4769     for (i=0; i<nrecvs; i++){
4770       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4771       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4772       for (j=0; j<nrows; j++) {
4773         b_othi[k+1] = b_othi[k] + rowlen[j];
4774         len += rowlen[j]; k++;
4775       }
4776       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4777     }
4778 
4779     /* allocate space for j and a arrrays of B_oth */
4780     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4781     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4782 
4783     /* j-array */
4784     /*---------*/
4785     /*  post receives of j-array */
4786     for (i=0; i<nrecvs; i++){
4787       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4788       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4789     }
4790 
4791     /* pack the outgoing message j-array */
4792     k = 0;
4793     for (i=0; i<nsends; i++){
4794       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4795       bufJ = bufj+sstartsj[i];
4796       for (j=0; j<nrows; j++) {
4797         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4798         for (ll=0; ll<sbs; ll++){
4799           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4800           for (l=0; l<ncols; l++){
4801             *bufJ++ = cols[l];
4802           }
4803           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4804         }
4805       }
4806       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4807     }
4808 
4809     /* recvs and sends of j-array are completed */
4810     i = nrecvs;
4811     while (i--) {
4812       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4813     }
4814     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4815   } else if (scall == MAT_REUSE_MATRIX){
4816     sstartsj = *startsj;
4817     rstartsj = *startsj_r;
4818     bufa     = *bufa_ptr;
4819     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4820     b_otha   = b_oth->a;
4821   } else {
4822     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4823   }
4824 
4825   /* a-array */
4826   /*---------*/
4827   /*  post receives of a-array */
4828   for (i=0; i<nrecvs; i++){
4829     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4830     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4831   }
4832 
4833   /* pack the outgoing message a-array */
4834   k = 0;
4835   for (i=0; i<nsends; i++){
4836     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4837     bufA = bufa+sstartsj[i];
4838     for (j=0; j<nrows; j++) {
4839       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4840       for (ll=0; ll<sbs; ll++){
4841         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4842         for (l=0; l<ncols; l++){
4843           *bufA++ = vals[l];
4844         }
4845         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4846       }
4847     }
4848     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4849   }
4850   /* recvs and sends of a-array are completed */
4851   i = nrecvs;
4852   while (i--) {
4853     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4854   }
4855   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4856   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4857 
4858   if (scall == MAT_INITIAL_MATRIX){
4859     /* put together the new matrix */
4860     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4861 
4862     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4863     /* Since these are PETSc arrays, change flags to free them as necessary. */
4864     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4865     b_oth->free_a  = PETSC_TRUE;
4866     b_oth->free_ij = PETSC_TRUE;
4867     b_oth->nonew   = 0;
4868 
4869     ierr = PetscFree(bufj);CHKERRQ(ierr);
4870     if (!startsj || !bufa_ptr){
4871       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4872       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4873     } else {
4874       *startsj   = sstartsj;
4875       *startsj_r = rstartsj;
4876       *bufa_ptr  = bufa;
4877     }
4878   }
4879   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4880   PetscFunctionReturn(0);
4881 }
4882 
4883 #undef __FUNCT__
4884 #define __FUNCT__ "MatGetCommunicationStructs"
4885 /*@C
4886   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4887 
4888   Not Collective
4889 
4890   Input Parameters:
4891 . A - The matrix in mpiaij format
4892 
4893   Output Parameter:
4894 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4895 . colmap - A map from global column index to local index into lvec
4896 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4897 
4898   Level: developer
4899 
4900 @*/
4901 #if defined (PETSC_USE_CTABLE)
4902 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4903 #else
4904 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4905 #endif
4906 {
4907   Mat_MPIAIJ *a;
4908 
4909   PetscFunctionBegin;
4910   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4911   PetscValidPointer(lvec, 2)
4912   PetscValidPointer(colmap, 3)
4913   PetscValidPointer(multScatter, 4)
4914   a = (Mat_MPIAIJ *) A->data;
4915   if (lvec) *lvec = a->lvec;
4916   if (colmap) *colmap = a->colmap;
4917   if (multScatter) *multScatter = a->Mvctx;
4918   PetscFunctionReturn(0);
4919 }
4920 
4921 EXTERN_C_BEGIN
4922 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*);
4923 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*);
4924 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
4925 EXTERN_C_END
4926 
4927 #undef __FUNCT__
4928 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4929 /*
4930     Computes (B'*A')' since computing B*A directly is untenable
4931 
4932                n                       p                          p
4933         (              )       (              )         (                  )
4934       m (      A       )  *  n (       B      )   =   m (         C        )
4935         (              )       (              )         (                  )
4936 
4937 */
4938 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4939 {
4940   PetscErrorCode     ierr;
4941   Mat                At,Bt,Ct;
4942 
4943   PetscFunctionBegin;
4944   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4945   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4946   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4947   ierr = MatDestroy(At);CHKERRQ(ierr);
4948   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4949   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4950   ierr = MatDestroy(Ct);CHKERRQ(ierr);
4951   PetscFunctionReturn(0);
4952 }
4953 
4954 #undef __FUNCT__
4955 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4956 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4957 {
4958   PetscErrorCode ierr;
4959   PetscInt       m=A->rmap->n,n=B->cmap->n;
4960   Mat            Cmat;
4961 
4962   PetscFunctionBegin;
4963   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4964   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
4965   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4966   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4967   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
4968   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4969   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4970   *C   = Cmat;
4971   PetscFunctionReturn(0);
4972 }
4973 
4974 /* ----------------------------------------------------------------*/
4975 #undef __FUNCT__
4976 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4977 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4978 {
4979   PetscErrorCode ierr;
4980 
4981   PetscFunctionBegin;
4982   if (scall == MAT_INITIAL_MATRIX){
4983     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4984   }
4985   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4986   PetscFunctionReturn(0);
4987 }
4988 
4989 EXTERN_C_BEGIN
4990 #if defined(PETSC_HAVE_MUMPS)
4991 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*);
4992 #endif
4993 #if defined(PETSC_HAVE_PASTIX)
4994 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
4995 #endif
4996 #if defined(PETSC_HAVE_SUPERLU_DIST)
4997 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
4998 #endif
4999 #if defined(PETSC_HAVE_SPOOLES)
5000 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5001 #endif
5002 EXTERN_C_END
5003 
5004 /*MC
5005    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5006 
5007    Options Database Keys:
5008 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5009 
5010   Level: beginner
5011 
5012 .seealso: MatCreateMPIAIJ()
5013 M*/
5014 
5015 EXTERN_C_BEGIN
5016 #undef __FUNCT__
5017 #define __FUNCT__ "MatCreate_MPIAIJ"
5018 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
5019 {
5020   Mat_MPIAIJ     *b;
5021   PetscErrorCode ierr;
5022   PetscMPIInt    size;
5023 
5024   PetscFunctionBegin;
5025   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5026 
5027   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5028   B->data         = (void*)b;
5029   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5030   B->rmap->bs     = 1;
5031   B->assembled    = PETSC_FALSE;
5032   B->mapping      = 0;
5033 
5034   B->insertmode   = NOT_SET_VALUES;
5035   b->size         = size;
5036   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5037 
5038   /* build cache for off array entries formed */
5039   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5040   b->donotstash  = PETSC_FALSE;
5041   b->colmap      = 0;
5042   b->garray      = 0;
5043   b->roworiented = PETSC_TRUE;
5044 
5045   /* stuff used for matrix vector multiply */
5046   b->lvec      = PETSC_NULL;
5047   b->Mvctx     = PETSC_NULL;
5048 
5049   /* stuff for MatGetRow() */
5050   b->rowindices   = 0;
5051   b->rowvalues    = 0;
5052   b->getrowactive = PETSC_FALSE;
5053 
5054 #if defined(PETSC_HAVE_SPOOLES)
5055   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5056                                      "MatGetFactor_mpiaij_spooles",
5057                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5058 #endif
5059 #if defined(PETSC_HAVE_MUMPS)
5060   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5061                                      "MatGetFactor_mpiaij_mumps",
5062                                      MatGetFactor_mpiaij_mumps);CHKERRQ(ierr);
5063 #endif
5064 #if defined(PETSC_HAVE_PASTIX)
5065   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5066 					   "MatGetFactor_mpiaij_pastix",
5067 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5068 #endif
5069 #if defined(PETSC_HAVE_SUPERLU_DIST)
5070   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5071                                      "MatGetFactor_mpiaij_superlu_dist",
5072                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5073 #endif
5074   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5075                                      "MatStoreValues_MPIAIJ",
5076                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5077   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5078                                      "MatRetrieveValues_MPIAIJ",
5079                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5080   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5081 				     "MatGetDiagonalBlock_MPIAIJ",
5082                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5083   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5084 				     "MatIsTranspose_MPIAIJ",
5085 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5086   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5087 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5088 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5089   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5090 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5091 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5092   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5093 				     "MatDiagonalScaleLocal_MPIAIJ",
5094 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5095   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
5096                                      "MatConvert_MPIAIJ_MPICSRPERM",
5097                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
5098   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
5099                                      "MatConvert_MPIAIJ_MPICRL",
5100                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
5101   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5102                                      "MatConvert_MPIAIJ_MPISBAIJ",
5103                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5104   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5105                                      "MatMatMult_MPIDense_MPIAIJ",
5106                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5107   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5108                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5109                                       MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5110   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5111                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5112                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5113   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5114   PetscFunctionReturn(0);
5115 }
5116 EXTERN_C_END
5117 
5118 #undef __FUNCT__
5119 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5120 /*@
5121      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5122          and "off-diagonal" part of the matrix in CSR format.
5123 
5124    Collective on MPI_Comm
5125 
5126    Input Parameters:
5127 +  comm - MPI communicator
5128 .  m - number of local rows (Cannot be PETSC_DECIDE)
5129 .  n - This value should be the same as the local size used in creating the
5130        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5131        calculated if N is given) For square matrices n is almost always m.
5132 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5133 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5134 .   i - row indices for "diagonal" portion of matrix
5135 .   j - column indices
5136 .   a - matrix values
5137 .   oi - row indices for "off-diagonal" portion of matrix
5138 .   oj - column indices
5139 -   oa - matrix values
5140 
5141    Output Parameter:
5142 .   mat - the matrix
5143 
5144    Level: advanced
5145 
5146    Notes:
5147        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5148 
5149        The i and j indices are 0 based
5150 
5151        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5152 
5153        This sets local rows and cannot be used to set off-processor values.
5154 
5155        You cannot later use MatSetValues() to change values in this matrix.
5156 
5157 .keywords: matrix, aij, compressed row, sparse, parallel
5158 
5159 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5160           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5161 @*/
5162 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5163 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5164 {
5165   PetscErrorCode ierr;
5166   Mat_MPIAIJ     *maij;
5167 
5168  PetscFunctionBegin;
5169   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5170   if (i[0]) {
5171     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5172   }
5173   if (oi[0]) {
5174     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5175   }
5176   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5177   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5178   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5179   maij = (Mat_MPIAIJ*) (*mat)->data;
5180   maij->donotstash     = PETSC_TRUE;
5181   (*mat)->preallocated = PETSC_TRUE;
5182 
5183   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5184   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5185   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5186   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5187 
5188   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5189   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5190 
5191   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5192   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5193   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5194   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5195 
5196   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5197   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5198   PetscFunctionReturn(0);
5199 }
5200 
5201 /*
5202     Special version for direct calls from Fortran
5203 */
5204 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5205 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5206 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5207 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5208 #endif
5209 
5210 /* Change these macros so can be used in void function */
5211 #undef CHKERRQ
5212 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5213 #undef SETERRQ2
5214 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5215 #undef SETERRQ
5216 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5217 
5218 EXTERN_C_BEGIN
5219 #undef __FUNCT__
5220 #define __FUNCT__ "matsetvaluesmpiaij_"
5221 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5222 {
5223   Mat             mat = *mmat;
5224   PetscInt        m = *mm, n = *mn;
5225   InsertMode      addv = *maddv;
5226   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5227   PetscScalar     value;
5228   PetscErrorCode  ierr;
5229 
5230   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5231   if (mat->insertmode == NOT_SET_VALUES) {
5232     mat->insertmode = addv;
5233   }
5234 #if defined(PETSC_USE_DEBUG)
5235   else if (mat->insertmode != addv) {
5236     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5237   }
5238 #endif
5239   {
5240   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5241   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5242   PetscTruth      roworiented = aij->roworiented;
5243 
5244   /* Some Variables required in the macro */
5245   Mat             A = aij->A;
5246   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5247   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5248   MatScalar       *aa = a->a;
5249   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5250   Mat             B = aij->B;
5251   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5252   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5253   MatScalar       *ba = b->a;
5254 
5255   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5256   PetscInt        nonew = a->nonew;
5257   MatScalar       *ap1,*ap2;
5258 
5259   PetscFunctionBegin;
5260   for (i=0; i<m; i++) {
5261     if (im[i] < 0) continue;
5262 #if defined(PETSC_USE_DEBUG)
5263     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5264 #endif
5265     if (im[i] >= rstart && im[i] < rend) {
5266       row      = im[i] - rstart;
5267       lastcol1 = -1;
5268       rp1      = aj + ai[row];
5269       ap1      = aa + ai[row];
5270       rmax1    = aimax[row];
5271       nrow1    = ailen[row];
5272       low1     = 0;
5273       high1    = nrow1;
5274       lastcol2 = -1;
5275       rp2      = bj + bi[row];
5276       ap2      = ba + bi[row];
5277       rmax2    = bimax[row];
5278       nrow2    = bilen[row];
5279       low2     = 0;
5280       high2    = nrow2;
5281 
5282       for (j=0; j<n; j++) {
5283         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5284         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5285         if (in[j] >= cstart && in[j] < cend){
5286           col = in[j] - cstart;
5287           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5288         } else if (in[j] < 0) continue;
5289 #if defined(PETSC_USE_DEBUG)
5290         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5291 #endif
5292         else {
5293           if (mat->was_assembled) {
5294             if (!aij->colmap) {
5295               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5296             }
5297 #if defined (PETSC_USE_CTABLE)
5298             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5299 	    col--;
5300 #else
5301             col = aij->colmap[in[j]] - 1;
5302 #endif
5303             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5304               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5305               col =  in[j];
5306               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5307               B = aij->B;
5308               b = (Mat_SeqAIJ*)B->data;
5309               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5310               rp2      = bj + bi[row];
5311               ap2      = ba + bi[row];
5312               rmax2    = bimax[row];
5313               nrow2    = bilen[row];
5314               low2     = 0;
5315               high2    = nrow2;
5316               bm       = aij->B->rmap->n;
5317               ba = b->a;
5318             }
5319           } else col = in[j];
5320           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5321         }
5322       }
5323     } else {
5324       if (!aij->donotstash) {
5325         if (roworiented) {
5326           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5327         } else {
5328           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5329         }
5330       }
5331     }
5332   }}
5333   PetscFunctionReturnVoid();
5334 }
5335 EXTERN_C_END
5336 
5337