xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 1d79065fde699397c4a1eb4514b06e24fdc073d8)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "MatDistribute_MPIAIJ"
7 /*
8     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
9     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
10 
11     Only for square matrices
12 */
13 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
14 {
15   PetscMPIInt    rank,size;
16   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
17   PetscErrorCode ierr;
18   Mat            mat;
19   Mat_SeqAIJ     *gmata;
20   PetscMPIInt    tag;
21   MPI_Status     status;
22   PetscTruth     aij;
23   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
24 
25   PetscFunctionBegin;
26   CHKMEMQ;
27   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
28   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
29   if (!rank) {
30     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
31     if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
32   }
33   if (reuse == MAT_INITIAL_MATRIX) {
34     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
35     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
36     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
37     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
38     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
39     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
40     rowners[0] = 0;
41     for (i=2; i<=size; i++) {
42       rowners[i] += rowners[i-1];
43     }
44     rstart = rowners[rank];
45     rend   = rowners[rank+1];
46     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
47     if (!rank) {
48       gmata = (Mat_SeqAIJ*) gmat->data;
49       /* send row lengths to all processors */
50       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
51       for (i=1; i<size; i++) {
52 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
53       }
54       /* determine number diagonal and off-diagonal counts */
55       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
56       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
57       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
58       jj = 0;
59       for (i=0; i<m; i++) {
60 	for (j=0; j<dlens[i]; j++) {
61           if (gmata->j[jj] < rstart) ld[i]++;
62 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
63 	  jj++;
64 	}
65       }
66       /* send column indices to other processes */
67       for (i=1; i<size; i++) {
68 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
69 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
70 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71       }
72 
73       /* send numerical values to other processes */
74       for (i=1; i<size; i++) {
75         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
76         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
77       }
78       gmataa = gmata->a;
79       gmataj = gmata->j;
80 
81     } else {
82       /* receive row lengths */
83       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
84       /* receive column indices */
85       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
86       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
87       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
88       /* determine number diagonal and off-diagonal counts */
89       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
90       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
91       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
92       jj = 0;
93       for (i=0; i<m; i++) {
94 	for (j=0; j<dlens[i]; j++) {
95           if (gmataj[jj] < rstart) ld[i]++;
96 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
97 	  jj++;
98 	}
99       }
100       /* receive numerical values */
101       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
102       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
103     }
104     /* set preallocation */
105     for (i=0; i<m; i++) {
106       dlens[i] -= olens[i];
107     }
108     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
109     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
110 
111     for (i=0; i<m; i++) {
112       dlens[i] += olens[i];
113     }
114     cnt  = 0;
115     for (i=0; i<m; i++) {
116       row  = rstart + i;
117       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
118       cnt += dlens[i];
119     }
120     if (rank) {
121       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
122     }
123     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
124     ierr = PetscFree(rowners);CHKERRQ(ierr);
125     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
126     *inmat = mat;
127   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
128     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
129     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
130     mat   = *inmat;
131     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
132     if (!rank) {
133       /* send numerical values to other processes */
134       gmata = (Mat_SeqAIJ*) gmat->data;
135       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
136       gmataa = gmata->a;
137       for (i=1; i<size; i++) {
138         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
139         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
140       }
141       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
142     } else {
143       /* receive numerical values from process 0*/
144       nz   = Ad->nz + Ao->nz;
145       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
146       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
147     }
148     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
149     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
150     ad = Ad->a;
151     ao = Ao->a;
152     if (mat->rmap->n) {
153       i  = 0;
154       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
155       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
156     }
157     for (i=1; i<mat->rmap->n; i++) {
158       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
159       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
160     }
161     i--;
162     if (mat->rmap->n) {
163       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
164     }
165     if (rank) {
166       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
167     }
168   }
169   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
170   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   CHKMEMQ;
172   PetscFunctionReturn(0);
173 }
174 
175 /*
176   Local utility routine that creates a mapping from the global column
177 number to the local number in the off-diagonal part of the local
178 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
179 a slightly higher hash table cost; without it it is not scalable (each processor
180 has an order N integer array but is fast to acess.
181 */
182 #undef __FUNCT__
183 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
184 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
185 {
186   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
187   PetscErrorCode ierr;
188   PetscInt       n = aij->B->cmap->n,i;
189 
190   PetscFunctionBegin;
191 #if defined (PETSC_USE_CTABLE)
192   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
193   for (i=0; i<n; i++){
194     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
195   }
196 #else
197   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
198   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
199   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
201 #endif
202   PetscFunctionReturn(0);
203 }
204 
205 
206 #define CHUNKSIZE   15
207 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
208 { \
209     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
210     lastcol1 = col;\
211     while (high1-low1 > 5) { \
212       t = (low1+high1)/2; \
213       if (rp1[t] > col) high1 = t; \
214       else             low1  = t; \
215     } \
216       for (_i=low1; _i<high1; _i++) { \
217         if (rp1[_i] > col) break; \
218         if (rp1[_i] == col) { \
219           if (addv == ADD_VALUES) ap1[_i] += value;   \
220           else                    ap1[_i] = value; \
221           goto a_noinsert; \
222         } \
223       }  \
224       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
225       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
226       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
227       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
228       N = nrow1++ - 1; a->nz++; high1++; \
229       /* shift up all the later entries in this row */ \
230       for (ii=N; ii>=_i; ii--) { \
231         rp1[ii+1] = rp1[ii]; \
232         ap1[ii+1] = ap1[ii]; \
233       } \
234       rp1[_i] = col;  \
235       ap1[_i] = value;  \
236       a_noinsert: ; \
237       ailen[row] = nrow1; \
238 }
239 
240 
241 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
242 { \
243     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
244     lastcol2 = col;\
245     while (high2-low2 > 5) { \
246       t = (low2+high2)/2; \
247       if (rp2[t] > col) high2 = t; \
248       else             low2  = t; \
249     } \
250     for (_i=low2; _i<high2; _i++) {		\
251       if (rp2[_i] > col) break;			\
252       if (rp2[_i] == col) {			      \
253 	if (addv == ADD_VALUES) ap2[_i] += value;     \
254 	else                    ap2[_i] = value;      \
255 	goto b_noinsert;			      \
256       }						      \
257     }							      \
258     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
259     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
260     if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
261     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
262     N = nrow2++ - 1; b->nz++; high2++;					\
263     /* shift up all the later entries in this row */			\
264     for (ii=N; ii>=_i; ii--) {						\
265       rp2[ii+1] = rp2[ii];						\
266       ap2[ii+1] = ap2[ii];						\
267     }									\
268     rp2[_i] = col;							\
269     ap2[_i] = value;							\
270     b_noinsert: ;								\
271     bilen[row] = nrow2;							\
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
276 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
277 {
278   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
279   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
280   PetscErrorCode ierr;
281   PetscInt       l,*garray = mat->garray,diag;
282 
283   PetscFunctionBegin;
284   /* code only works for square matrices A */
285 
286   /* find size of row to the left of the diagonal part */
287   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
288   row  = row - diag;
289   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
290     if (garray[b->j[b->i[row]+l]] > diag) break;
291   }
292   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
293 
294   /* diagonal part */
295   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
296 
297   /* right of diagonal part */
298   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
299   PetscFunctionReturn(0);
300 }
301 
302 #undef __FUNCT__
303 #define __FUNCT__ "MatSetValues_MPIAIJ"
304 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
305 {
306   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
307   PetscScalar    value;
308   PetscErrorCode ierr;
309   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
310   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
311   PetscTruth     roworiented = aij->roworiented;
312 
313   /* Some Variables required in the macro */
314   Mat            A = aij->A;
315   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
316   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
317   MatScalar      *aa = a->a;
318   PetscTruth     ignorezeroentries = a->ignorezeroentries;
319   Mat            B = aij->B;
320   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
321   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
322   MatScalar      *ba = b->a;
323 
324   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
325   PetscInt       nonew = a->nonew;
326   MatScalar      *ap1,*ap2;
327 
328   PetscFunctionBegin;
329   if (v) PetscValidScalarPointer(v,6);
330   for (i=0; i<m; i++) {
331     if (im[i] < 0) continue;
332 #if defined(PETSC_USE_DEBUG)
333     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
334 #endif
335     if (im[i] >= rstart && im[i] < rend) {
336       row      = im[i] - rstart;
337       lastcol1 = -1;
338       rp1      = aj + ai[row];
339       ap1      = aa + ai[row];
340       rmax1    = aimax[row];
341       nrow1    = ailen[row];
342       low1     = 0;
343       high1    = nrow1;
344       lastcol2 = -1;
345       rp2      = bj + bi[row];
346       ap2      = ba + bi[row];
347       rmax2    = bimax[row];
348       nrow2    = bilen[row];
349       low2     = 0;
350       high2    = nrow2;
351 
352       for (j=0; j<n; j++) {
353         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
354         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
355         if (in[j] >= cstart && in[j] < cend){
356           col = in[j] - cstart;
357           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
358         } else if (in[j] < 0) continue;
359 #if defined(PETSC_USE_DEBUG)
360         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
361 #endif
362         else {
363           if (mat->was_assembled) {
364             if (!aij->colmap) {
365               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
366             }
367 #if defined (PETSC_USE_CTABLE)
368             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
369 	    col--;
370 #else
371             col = aij->colmap[in[j]] - 1;
372 #endif
373             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
374               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
375               col =  in[j];
376               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
377               B = aij->B;
378               b = (Mat_SeqAIJ*)B->data;
379               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
380               rp2      = bj + bi[row];
381               ap2      = ba + bi[row];
382               rmax2    = bimax[row];
383               nrow2    = bilen[row];
384               low2     = 0;
385               high2    = nrow2;
386               bm       = aij->B->rmap->n;
387               ba = b->a;
388             }
389           } else col = in[j];
390           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
391         }
392       }
393     } else {
394       if (!aij->donotstash) {
395         if (roworiented) {
396           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
397         } else {
398           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
399         }
400       }
401     }
402   }
403   PetscFunctionReturn(0);
404 }
405 
406 #undef __FUNCT__
407 #define __FUNCT__ "MatGetValues_MPIAIJ"
408 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
409 {
410   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
413   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
414 
415   PetscFunctionBegin;
416   for (i=0; i<m; i++) {
417     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
418     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
419     if (idxm[i] >= rstart && idxm[i] < rend) {
420       row = idxm[i] - rstart;
421       for (j=0; j<n; j++) {
422         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
423         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
424         if (idxn[j] >= cstart && idxn[j] < cend){
425           col = idxn[j] - cstart;
426           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
427         } else {
428           if (!aij->colmap) {
429             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
430           }
431 #if defined (PETSC_USE_CTABLE)
432           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
433           col --;
434 #else
435           col = aij->colmap[idxn[j]] - 1;
436 #endif
437           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
438           else {
439             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
440           }
441         }
442       }
443     } else {
444       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
445     }
446   }
447   PetscFunctionReturn(0);
448 }
449 
450 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscErrorCode ierr;
458   PetscInt       nstash,reallocs;
459   InsertMode     addv;
460 
461   PetscFunctionBegin;
462   if (aij->donotstash) {
463     PetscFunctionReturn(0);
464   }
465 
466   /* make sure all processors are either in INSERTMODE or ADDMODE */
467   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
468   if (addv == (ADD_VALUES|INSERT_VALUES)) {
469     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
470   }
471   mat->insertmode = addv; /* in case this processor had no cache */
472 
473   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
474   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
475   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
482 {
483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
484   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
485   PetscErrorCode ierr;
486   PetscMPIInt    n;
487   PetscInt       i,j,rstart,ncols,flg;
488   PetscInt       *row,*col;
489   PetscTruth     other_disassembled;
490   PetscScalar    *val;
491   InsertMode     addv = mat->insertmode;
492 
493   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
494   PetscFunctionBegin;
495   if (!aij->donotstash) {
496     while (1) {
497       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
498       if (!flg) break;
499 
500       for (i=0; i<n;) {
501         /* Now identify the consecutive vals belonging to the same row */
502         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
503         if (j < n) ncols = j-i;
504         else       ncols = n-i;
505         /* Now assemble all these values with a single function call */
506         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
507         i = j;
508       }
509     }
510     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
511   }
512   a->compressedrow.use     = PETSC_FALSE;
513   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
514   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
515 
516   /* determine if any processor has disassembled, if so we must
517      also disassemble ourselfs, in order that we may reassemble. */
518   /*
519      if nonzero structure of submatrix B cannot change then we know that
520      no processor disassembled thus we can skip this stuff
521   */
522   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
523     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
524     if (mat->was_assembled && !other_disassembled) {
525       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
526     }
527   }
528   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
529     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
530   }
531   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
532   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
533   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
534   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
535 
536   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
537   aij->rowvalues = 0;
538 
539   /* used by MatAXPY() */
540   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
541   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
542 
543   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
544   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
545   PetscFunctionReturn(0);
546 }
547 
548 #undef __FUNCT__
549 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
550 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
551 {
552   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
553   PetscErrorCode ierr;
554 
555   PetscFunctionBegin;
556   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
557   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
558   PetscFunctionReturn(0);
559 }
560 
561 #undef __FUNCT__
562 #define __FUNCT__ "MatZeroRows_MPIAIJ"
563 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
564 {
565   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
566   PetscErrorCode ierr;
567   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
568   PetscInt       i,*owners = A->rmap->range;
569   PetscInt       *nprocs,j,idx,nsends,row;
570   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
571   PetscInt       *rvalues,count,base,slen,*source;
572   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
573   MPI_Comm       comm = ((PetscObject)A)->comm;
574   MPI_Request    *send_waits,*recv_waits;
575   MPI_Status     recv_status,*send_status;
576 #if defined(PETSC_DEBUG)
577   PetscTruth     found = PETSC_FALSE;
578 #endif
579 
580   PetscFunctionBegin;
581   /*  first count number of contributors to each processor */
582   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
583   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
584   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
585   j = 0;
586   for (i=0; i<N; i++) {
587     if (lastidx > (idx = rows[i])) j = 0;
588     lastidx = idx;
589     for (; j<size; j++) {
590       if (idx >= owners[j] && idx < owners[j+1]) {
591         nprocs[2*j]++;
592         nprocs[2*j+1] = 1;
593         owner[i] = j;
594 #if defined(PETSC_DEBUG)
595         found = PETSC_TRUE;
596 #endif
597         break;
598       }
599     }
600 #if defined(PETSC_DEBUG)
601     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
602     found = PETSC_FALSE;
603 #endif
604   }
605   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
606 
607   /* inform other processors of number of messages and max length*/
608   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
609 
610   /* post receives:   */
611   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
612   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
613   for (i=0; i<nrecvs; i++) {
614     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
615   }
616 
617   /* do sends:
618       1) starts[i] gives the starting index in svalues for stuff going to
619          the ith processor
620   */
621   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
622   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
623   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
624   starts[0] = 0;
625   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
626   for (i=0; i<N; i++) {
627     svalues[starts[owner[i]]++] = rows[i];
628   }
629 
630   starts[0] = 0;
631   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
632   count = 0;
633   for (i=0; i<size; i++) {
634     if (nprocs[2*i+1]) {
635       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
636     }
637   }
638   ierr = PetscFree(starts);CHKERRQ(ierr);
639 
640   base = owners[rank];
641 
642   /*  wait on receives */
643   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
644   count  = nrecvs; slen = 0;
645   while (count) {
646     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
647     /* unpack receives into our local space */
648     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
649     source[imdex]  = recv_status.MPI_SOURCE;
650     lens[imdex]    = n;
651     slen          += n;
652     count--;
653   }
654   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
655 
656   /* move the data into the send scatter */
657   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
658   count = 0;
659   for (i=0; i<nrecvs; i++) {
660     values = rvalues + i*nmax;
661     for (j=0; j<lens[i]; j++) {
662       lrows[count++] = values[j] - base;
663     }
664   }
665   ierr = PetscFree(rvalues);CHKERRQ(ierr);
666   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
667   ierr = PetscFree(owner);CHKERRQ(ierr);
668   ierr = PetscFree(nprocs);CHKERRQ(ierr);
669 
670   /* actually zap the local rows */
671   /*
672         Zero the required rows. If the "diagonal block" of the matrix
673      is square and the user wishes to set the diagonal we use separate
674      code so that MatSetValues() is not called for each diagonal allocating
675      new memory, thus calling lots of mallocs and slowing things down.
676 
677   */
678   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
679   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
680   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
681     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
682   } else if (diag != 0.0) {
683     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
684     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
685       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
687     }
688     for (i = 0; i < slen; i++) {
689       row  = lrows[i] + rstart;
690       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
691     }
692     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
693     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
694   } else {
695     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
696   }
697   ierr = PetscFree(lrows);CHKERRQ(ierr);
698 
699   /* wait on sends */
700   if (nsends) {
701     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
702     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
703     ierr = PetscFree(send_status);CHKERRQ(ierr);
704   }
705   ierr = PetscFree(send_waits);CHKERRQ(ierr);
706   ierr = PetscFree(svalues);CHKERRQ(ierr);
707 
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNCT__
712 #define __FUNCT__ "MatMult_MPIAIJ"
713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
714 {
715   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
716   PetscErrorCode ierr;
717   PetscInt       nt;
718 
719   PetscFunctionBegin;
720   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
721   if (nt != A->cmap->n) {
722     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
723   }
724   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
725   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
726   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
727   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
728   PetscFunctionReturn(0);
729 }
730 
731 #undef __FUNCT__
732 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
733 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
734 {
735   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
736   PetscErrorCode ierr;
737 
738   PetscFunctionBegin;
739   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatMultAdd_MPIAIJ"
745 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
746 {
747   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
748   PetscErrorCode ierr;
749 
750   PetscFunctionBegin;
751   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
752   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
753   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
754   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
755   PetscFunctionReturn(0);
756 }
757 
758 #undef __FUNCT__
759 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
760 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
761 {
762   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
763   PetscErrorCode ierr;
764   PetscTruth     merged;
765 
766   PetscFunctionBegin;
767   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
768   /* do nondiagonal part */
769   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
770   if (!merged) {
771     /* send it on its way */
772     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
773     /* do local part */
774     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
775     /* receive remote parts: note this assumes the values are not actually */
776     /* added in yy until the next line, */
777     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
778   } else {
779     /* do local part */
780     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
781     /* send it on its way */
782     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
783     /* values actually were received in the Begin() but we need to call this nop */
784     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
785   }
786   PetscFunctionReturn(0);
787 }
788 
789 EXTERN_C_BEGIN
790 #undef __FUNCT__
791 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
792 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
793 {
794   MPI_Comm       comm;
795   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
796   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
797   IS             Me,Notme;
798   PetscErrorCode ierr;
799   PetscInt       M,N,first,last,*notme,i;
800   PetscMPIInt    size;
801 
802   PetscFunctionBegin;
803 
804   /* Easy test: symmetric diagonal block */
805   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
806   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
807   if (!*f) PetscFunctionReturn(0);
808   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
809   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
810   if (size == 1) PetscFunctionReturn(0);
811 
812   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
813   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
814   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
815   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
816   for (i=0; i<first; i++) notme[i] = i;
817   for (i=last; i<M; i++) notme[i-last+first] = i;
818   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
819   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
820   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
821   Aoff = Aoffs[0];
822   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
823   Boff = Boffs[0];
824   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
825   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
826   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
827   ierr = ISDestroy(Me);CHKERRQ(ierr);
828   ierr = ISDestroy(Notme);CHKERRQ(ierr);
829 
830   PetscFunctionReturn(0);
831 }
832 EXTERN_C_END
833 
834 #undef __FUNCT__
835 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
836 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
837 {
838   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
839   PetscErrorCode ierr;
840 
841   PetscFunctionBegin;
842   /* do nondiagonal part */
843   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
844   /* send it on its way */
845   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
846   /* do local part */
847   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
848   /* receive remote parts */
849   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
850   PetscFunctionReturn(0);
851 }
852 
853 /*
854   This only works correctly for square matrices where the subblock A->A is the
855    diagonal block
856 */
857 #undef __FUNCT__
858 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
859 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
860 {
861   PetscErrorCode ierr;
862   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
863 
864   PetscFunctionBegin;
865   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
866   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) {
867     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
868   }
869   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "MatScale_MPIAIJ"
875 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
876 {
877   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
878   PetscErrorCode ierr;
879 
880   PetscFunctionBegin;
881   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
882   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
883   PetscFunctionReturn(0);
884 }
885 
886 #undef __FUNCT__
887 #define __FUNCT__ "MatDestroy_MPIAIJ"
888 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
889 {
890   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
891   PetscErrorCode ierr;
892 
893   PetscFunctionBegin;
894 #if defined(PETSC_USE_LOG)
895   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
896 #endif
897   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
898   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
899   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
900   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
901 #if defined (PETSC_USE_CTABLE)
902   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
903 #else
904   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
905 #endif
906   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
907   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
908   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
909   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
910   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
911   ierr = PetscFree(aij);CHKERRQ(ierr);
912 
913   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
914   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
915   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
916   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
917   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
918   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
919   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
920   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
921   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
922   PetscFunctionReturn(0);
923 }
924 
925 #undef __FUNCT__
926 #define __FUNCT__ "MatView_MPIAIJ_Binary"
927 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
928 {
929   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
930   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
931   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
932   PetscErrorCode    ierr;
933   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
934   int               fd;
935   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
936   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
937   PetscScalar       *column_values;
938 
939   PetscFunctionBegin;
940   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
941   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
942   nz   = A->nz + B->nz;
943   if (!rank) {
944     header[0] = MAT_FILE_COOKIE;
945     header[1] = mat->rmap->N;
946     header[2] = mat->cmap->N;
947     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
948     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
949     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
950     /* get largest number of rows any processor has */
951     rlen = mat->rmap->n;
952     range = mat->rmap->range;
953     for (i=1; i<size; i++) {
954       rlen = PetscMax(rlen,range[i+1] - range[i]);
955     }
956   } else {
957     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
958     rlen = mat->rmap->n;
959   }
960 
961   /* load up the local row counts */
962   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
963   for (i=0; i<mat->rmap->n; i++) {
964     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
965   }
966 
967   /* store the row lengths to the file */
968   if (!rank) {
969     MPI_Status status;
970     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
971     for (i=1; i<size; i++) {
972       rlen = range[i+1] - range[i];
973       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
974       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
975     }
976   } else {
977     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
978   }
979   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
980 
981   /* load up the local column indices */
982   nzmax = nz; /* )th processor needs space a largest processor needs */
983   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
984   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
985   cnt  = 0;
986   for (i=0; i<mat->rmap->n; i++) {
987     for (j=B->i[i]; j<B->i[i+1]; j++) {
988       if ( (col = garray[B->j[j]]) > cstart) break;
989       column_indices[cnt++] = col;
990     }
991     for (k=A->i[i]; k<A->i[i+1]; k++) {
992       column_indices[cnt++] = A->j[k] + cstart;
993     }
994     for (; j<B->i[i+1]; j++) {
995       column_indices[cnt++] = garray[B->j[j]];
996     }
997   }
998   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
999 
1000   /* store the column indices to the file */
1001   if (!rank) {
1002     MPI_Status status;
1003     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1004     for (i=1; i<size; i++) {
1005       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1006       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1007       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1008       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1009     }
1010   } else {
1011     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1012     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1013   }
1014   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1015 
1016   /* load up the local column values */
1017   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1018   cnt  = 0;
1019   for (i=0; i<mat->rmap->n; i++) {
1020     for (j=B->i[i]; j<B->i[i+1]; j++) {
1021       if ( garray[B->j[j]] > cstart) break;
1022       column_values[cnt++] = B->a[j];
1023     }
1024     for (k=A->i[i]; k<A->i[i+1]; k++) {
1025       column_values[cnt++] = A->a[k];
1026     }
1027     for (; j<B->i[i+1]; j++) {
1028       column_values[cnt++] = B->a[j];
1029     }
1030   }
1031   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1032 
1033   /* store the column values to the file */
1034   if (!rank) {
1035     MPI_Status status;
1036     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1037     for (i=1; i<size; i++) {
1038       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1039       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1040       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1041       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1042     }
1043   } else {
1044     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1045     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1046   }
1047   ierr = PetscFree(column_values);CHKERRQ(ierr);
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 #undef __FUNCT__
1052 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1053 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1054 {
1055   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1056   PetscErrorCode    ierr;
1057   PetscMPIInt       rank = aij->rank,size = aij->size;
1058   PetscTruth        isdraw,iascii,isbinary;
1059   PetscViewer       sviewer;
1060   PetscViewerFormat format;
1061 
1062   PetscFunctionBegin;
1063   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1064   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1065   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1066   if (iascii) {
1067     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1068     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1069       MatInfo    info;
1070       PetscTruth inodes;
1071 
1072       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1073       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1074       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1075       if (!inodes) {
1076         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1077 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1078       } else {
1079         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1080 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1081       }
1082       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1083       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1084       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1085       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1086       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1087       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1088       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1089       PetscFunctionReturn(0);
1090     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1091       PetscInt   inodecount,inodelimit,*inodes;
1092       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1093       if (inodes) {
1094         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1095       } else {
1096         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1097       }
1098       PetscFunctionReturn(0);
1099     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1100       PetscFunctionReturn(0);
1101     }
1102   } else if (isbinary) {
1103     if (size == 1) {
1104       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1105       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1106     } else {
1107       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1108     }
1109     PetscFunctionReturn(0);
1110   } else if (isdraw) {
1111     PetscDraw  draw;
1112     PetscTruth isnull;
1113     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1114     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1115   }
1116 
1117   if (size == 1) {
1118     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1119     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1120   } else {
1121     /* assemble the entire matrix onto first processor. */
1122     Mat         A;
1123     Mat_SeqAIJ  *Aloc;
1124     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1125     MatScalar   *a;
1126 
1127     if (mat->rmap->N > 1024) {
1128       PetscTruth flg = PETSC_FALSE;
1129 
1130       ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1131       if (!flg) {
1132         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1133       }
1134     }
1135 
1136     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1137     if (!rank) {
1138       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1139     } else {
1140       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1141     }
1142     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1143     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1144     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1145     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1146 
1147     /* copy over the A part */
1148     Aloc = (Mat_SeqAIJ*)aij->A->data;
1149     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1150     row = mat->rmap->rstart;
1151     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1152     for (i=0; i<m; i++) {
1153       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1154       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1155     }
1156     aj = Aloc->j;
1157     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1158 
1159     /* copy over the B part */
1160     Aloc = (Mat_SeqAIJ*)aij->B->data;
1161     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1162     row  = mat->rmap->rstart;
1163     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1164     ct   = cols;
1165     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1166     for (i=0; i<m; i++) {
1167       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1168       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1169     }
1170     ierr = PetscFree(ct);CHKERRQ(ierr);
1171     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1172     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1173     /*
1174        Everyone has to call to draw the matrix since the graphics waits are
1175        synchronized across all processors that share the PetscDraw object
1176     */
1177     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1178     if (!rank) {
1179       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1180       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1181     }
1182     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1183     ierr = MatDestroy(A);CHKERRQ(ierr);
1184   }
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 #undef __FUNCT__
1189 #define __FUNCT__ "MatView_MPIAIJ"
1190 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1191 {
1192   PetscErrorCode ierr;
1193   PetscTruth     iascii,isdraw,issocket,isbinary;
1194 
1195   PetscFunctionBegin;
1196   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1197   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1198   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1199   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1200   if (iascii || isdraw || isbinary || issocket) {
1201     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1202   } else {
1203     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1204   }
1205   PetscFunctionReturn(0);
1206 }
1207 
1208 #undef __FUNCT__
1209 #define __FUNCT__ "MatSOR_MPIAIJ"
1210 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1211 {
1212   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1213   PetscErrorCode ierr;
1214   Vec            bb1 = 0;
1215   PetscTruth     hasop;
1216 
1217   PetscFunctionBegin;
1218   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1219     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1220   }
1221 
1222   if (flag == SOR_APPLY_UPPER) {
1223     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1224     PetscFunctionReturn(0);
1225   }
1226 
1227   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1228     if (flag & SOR_ZERO_INITIAL_GUESS) {
1229       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1230       its--;
1231     }
1232 
1233     while (its--) {
1234       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1235       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1236 
1237       /* update rhs: bb1 = bb - B*x */
1238       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1239       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1240 
1241       /* local sweep */
1242       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1243     }
1244   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1245     if (flag & SOR_ZERO_INITIAL_GUESS) {
1246       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1247       its--;
1248     }
1249     while (its--) {
1250       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1251       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1252 
1253       /* update rhs: bb1 = bb - B*x */
1254       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1255       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1256 
1257       /* local sweep */
1258       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1259     }
1260   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1261     if (flag & SOR_ZERO_INITIAL_GUESS) {
1262       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1263       its--;
1264     }
1265     while (its--) {
1266       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1267       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1268 
1269       /* update rhs: bb1 = bb - B*x */
1270       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1271       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1272 
1273       /* local sweep */
1274       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1275     }
1276   }  else if (flag & SOR_EISENSTAT) {
1277     Vec         xx1;
1278 
1279     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1280     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1281 
1282     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1283     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1284     if (!mat->diag) {
1285       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1286       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1287     }
1288     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1289     if (hasop) {
1290       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1291     } else {
1292       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1293     }
1294     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1295 
1296     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1297 
1298     /* local sweep */
1299     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1300     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1301     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1302   } else {
1303     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1304   }
1305 
1306   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatPermute_MPIAIJ"
1312 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1313 {
1314   MPI_Comm       comm,pcomm;
1315   PetscInt       first,local_size,nrows;
1316   const PetscInt *rows;
1317   PetscMPIInt    size;
1318   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1319   PetscErrorCode ierr;
1320 
1321   PetscFunctionBegin;
1322   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1323   /* make a collective version of 'rowp' */
1324   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1325   if (pcomm==comm) {
1326     crowp = rowp;
1327   } else {
1328     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1329     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1330     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1331     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1332   }
1333   /* collect the global row permutation and invert it */
1334   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1335   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1336   if (pcomm!=comm) {
1337     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1338   }
1339   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1340   /* get the local target indices */
1341   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1342   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1343   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1344   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
1345   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1346   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1347   /* the column permutation is so much easier;
1348      make a local version of 'colp' and invert it */
1349   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1350   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1351   if (size==1) {
1352     lcolp = colp;
1353   } else {
1354     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1355     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1356     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
1357   }
1358   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1359   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1360   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1361   if (size>1) {
1362     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1363     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1364   }
1365   /* now we just get the submatrix */
1366   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1367   /* clean up */
1368   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1369   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1370   PetscFunctionReturn(0);
1371 }
1372 
1373 #undef __FUNCT__
1374 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1375 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1376 {
1377   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1378   Mat            A = mat->A,B = mat->B;
1379   PetscErrorCode ierr;
1380   PetscReal      isend[5],irecv[5];
1381 
1382   PetscFunctionBegin;
1383   info->block_size     = 1.0;
1384   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1385   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1386   isend[3] = info->memory;  isend[4] = info->mallocs;
1387   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1388   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1389   isend[3] += info->memory;  isend[4] += info->mallocs;
1390   if (flag == MAT_LOCAL) {
1391     info->nz_used      = isend[0];
1392     info->nz_allocated = isend[1];
1393     info->nz_unneeded  = isend[2];
1394     info->memory       = isend[3];
1395     info->mallocs      = isend[4];
1396   } else if (flag == MAT_GLOBAL_MAX) {
1397     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1398     info->nz_used      = irecv[0];
1399     info->nz_allocated = irecv[1];
1400     info->nz_unneeded  = irecv[2];
1401     info->memory       = irecv[3];
1402     info->mallocs      = irecv[4];
1403   } else if (flag == MAT_GLOBAL_SUM) {
1404     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1405     info->nz_used      = irecv[0];
1406     info->nz_allocated = irecv[1];
1407     info->nz_unneeded  = irecv[2];
1408     info->memory       = irecv[3];
1409     info->mallocs      = irecv[4];
1410   }
1411   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1412   info->fill_ratio_needed = 0;
1413   info->factor_mallocs    = 0;
1414 
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 #undef __FUNCT__
1419 #define __FUNCT__ "MatSetOption_MPIAIJ"
1420 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1421 {
1422   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1423   PetscErrorCode ierr;
1424 
1425   PetscFunctionBegin;
1426   switch (op) {
1427   case MAT_NEW_NONZERO_LOCATIONS:
1428   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1429   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1430   case MAT_KEEP_NONZERO_PATTERN:
1431   case MAT_NEW_NONZERO_LOCATION_ERR:
1432   case MAT_USE_INODES:
1433   case MAT_IGNORE_ZERO_ENTRIES:
1434     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1435     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1436     break;
1437   case MAT_ROW_ORIENTED:
1438     a->roworiented = flg;
1439     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1440     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1441     break;
1442   case MAT_NEW_DIAGONALS:
1443     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1444     break;
1445   case MAT_IGNORE_OFF_PROC_ENTRIES:
1446     a->donotstash = PETSC_TRUE;
1447     break;
1448   case MAT_SYMMETRIC:
1449     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1450     break;
1451   case MAT_STRUCTURALLY_SYMMETRIC:
1452     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1453     break;
1454   case MAT_HERMITIAN:
1455     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1456     break;
1457   case MAT_SYMMETRY_ETERNAL:
1458     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1459     break;
1460   default:
1461     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1462   }
1463   PetscFunctionReturn(0);
1464 }
1465 
1466 #undef __FUNCT__
1467 #define __FUNCT__ "MatGetRow_MPIAIJ"
1468 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1469 {
1470   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1471   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1472   PetscErrorCode ierr;
1473   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1474   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1475   PetscInt       *cmap,*idx_p;
1476 
1477   PetscFunctionBegin;
1478   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1479   mat->getrowactive = PETSC_TRUE;
1480 
1481   if (!mat->rowvalues && (idx || v)) {
1482     /*
1483         allocate enough space to hold information from the longest row.
1484     */
1485     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1486     PetscInt     max = 1,tmp;
1487     for (i=0; i<matin->rmap->n; i++) {
1488       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1489       if (max < tmp) { max = tmp; }
1490     }
1491     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1492   }
1493 
1494   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1495   lrow = row - rstart;
1496 
1497   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1498   if (!v)   {pvA = 0; pvB = 0;}
1499   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1500   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1501   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1502   nztot = nzA + nzB;
1503 
1504   cmap  = mat->garray;
1505   if (v  || idx) {
1506     if (nztot) {
1507       /* Sort by increasing column numbers, assuming A and B already sorted */
1508       PetscInt imark = -1;
1509       if (v) {
1510         *v = v_p = mat->rowvalues;
1511         for (i=0; i<nzB; i++) {
1512           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1513           else break;
1514         }
1515         imark = i;
1516         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1517         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1518       }
1519       if (idx) {
1520         *idx = idx_p = mat->rowindices;
1521         if (imark > -1) {
1522           for (i=0; i<imark; i++) {
1523             idx_p[i] = cmap[cworkB[i]];
1524           }
1525         } else {
1526           for (i=0; i<nzB; i++) {
1527             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1528             else break;
1529           }
1530           imark = i;
1531         }
1532         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1533         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1534       }
1535     } else {
1536       if (idx) *idx = 0;
1537       if (v)   *v   = 0;
1538     }
1539   }
1540   *nz = nztot;
1541   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1542   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1543   PetscFunctionReturn(0);
1544 }
1545 
1546 #undef __FUNCT__
1547 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1548 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1549 {
1550   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1551 
1552   PetscFunctionBegin;
1553   if (!aij->getrowactive) {
1554     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1555   }
1556   aij->getrowactive = PETSC_FALSE;
1557   PetscFunctionReturn(0);
1558 }
1559 
1560 #undef __FUNCT__
1561 #define __FUNCT__ "MatNorm_MPIAIJ"
1562 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1563 {
1564   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1565   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1566   PetscErrorCode ierr;
1567   PetscInt       i,j,cstart = mat->cmap->rstart;
1568   PetscReal      sum = 0.0;
1569   MatScalar      *v;
1570 
1571   PetscFunctionBegin;
1572   if (aij->size == 1) {
1573     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1574   } else {
1575     if (type == NORM_FROBENIUS) {
1576       v = amat->a;
1577       for (i=0; i<amat->nz; i++) {
1578 #if defined(PETSC_USE_COMPLEX)
1579         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1580 #else
1581         sum += (*v)*(*v); v++;
1582 #endif
1583       }
1584       v = bmat->a;
1585       for (i=0; i<bmat->nz; i++) {
1586 #if defined(PETSC_USE_COMPLEX)
1587         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1588 #else
1589         sum += (*v)*(*v); v++;
1590 #endif
1591       }
1592       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1593       *norm = sqrt(*norm);
1594     } else if (type == NORM_1) { /* max column norm */
1595       PetscReal *tmp,*tmp2;
1596       PetscInt  *jj,*garray = aij->garray;
1597       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1598       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1599       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1600       *norm = 0.0;
1601       v = amat->a; jj = amat->j;
1602       for (j=0; j<amat->nz; j++) {
1603         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1604       }
1605       v = bmat->a; jj = bmat->j;
1606       for (j=0; j<bmat->nz; j++) {
1607         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1608       }
1609       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1610       for (j=0; j<mat->cmap->N; j++) {
1611         if (tmp2[j] > *norm) *norm = tmp2[j];
1612       }
1613       ierr = PetscFree(tmp);CHKERRQ(ierr);
1614       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1615     } else if (type == NORM_INFINITY) { /* max row norm */
1616       PetscReal ntemp = 0.0;
1617       for (j=0; j<aij->A->rmap->n; j++) {
1618         v = amat->a + amat->i[j];
1619         sum = 0.0;
1620         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1621           sum += PetscAbsScalar(*v); v++;
1622         }
1623         v = bmat->a + bmat->i[j];
1624         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1625           sum += PetscAbsScalar(*v); v++;
1626         }
1627         if (sum > ntemp) ntemp = sum;
1628       }
1629       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1630     } else {
1631       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1632     }
1633   }
1634   PetscFunctionReturn(0);
1635 }
1636 
1637 #undef __FUNCT__
1638 #define __FUNCT__ "MatTranspose_MPIAIJ"
1639 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1640 {
1641   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1642   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1643   PetscErrorCode ierr;
1644   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1645   PetscInt       cstart=A->cmap->rstart,ncol;
1646   Mat            B;
1647   MatScalar      *array;
1648 
1649   PetscFunctionBegin;
1650   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1651 
1652   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1653   ai = Aloc->i; aj = Aloc->j;
1654   bi = Bloc->i; bj = Bloc->j;
1655   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1656     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1657     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1658     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1659     for (i=0; i<ai[ma]; i++){
1660       d_nnz[aj[i]] ++;
1661       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1662     }
1663 
1664     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1665     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1666     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1667     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1668     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1669   } else {
1670     B = *matout;
1671   }
1672 
1673   /* copy over the A part */
1674   array = Aloc->a;
1675   row = A->rmap->rstart;
1676   for (i=0; i<ma; i++) {
1677     ncol = ai[i+1]-ai[i];
1678     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1679     row++; array += ncol; aj += ncol;
1680   }
1681   aj = Aloc->j;
1682   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1683 
1684   /* copy over the B part */
1685   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1686   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1687   array = Bloc->a;
1688   row = A->rmap->rstart;
1689   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1690   cols_tmp = cols;
1691   for (i=0; i<mb; i++) {
1692     ncol = bi[i+1]-bi[i];
1693     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1694     row++; array += ncol; cols_tmp += ncol;
1695   }
1696   ierr = PetscFree(cols);CHKERRQ(ierr);
1697 
1698   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1699   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1700   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1701     *matout = B;
1702   } else {
1703     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1704   }
1705   PetscFunctionReturn(0);
1706 }
1707 
1708 #undef __FUNCT__
1709 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1710 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1711 {
1712   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1713   Mat            a = aij->A,b = aij->B;
1714   PetscErrorCode ierr;
1715   PetscInt       s1,s2,s3;
1716 
1717   PetscFunctionBegin;
1718   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1719   if (rr) {
1720     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1721     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1722     /* Overlap communication with computation. */
1723     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1724   }
1725   if (ll) {
1726     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1727     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1728     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1729   }
1730   /* scale  the diagonal block */
1731   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1732 
1733   if (rr) {
1734     /* Do a scatter end and then right scale the off-diagonal block */
1735     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1736     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1737   }
1738 
1739   PetscFunctionReturn(0);
1740 }
1741 
1742 #undef __FUNCT__
1743 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1744 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1745 {
1746   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1747   PetscErrorCode ierr;
1748 
1749   PetscFunctionBegin;
1750   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1751   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1752   PetscFunctionReturn(0);
1753 }
1754 #undef __FUNCT__
1755 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1756 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1757 {
1758   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1759   PetscErrorCode ierr;
1760 
1761   PetscFunctionBegin;
1762   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1763   PetscFunctionReturn(0);
1764 }
1765 
1766 #undef __FUNCT__
1767 #define __FUNCT__ "MatEqual_MPIAIJ"
1768 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1769 {
1770   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1771   Mat            a,b,c,d;
1772   PetscTruth     flg;
1773   PetscErrorCode ierr;
1774 
1775   PetscFunctionBegin;
1776   a = matA->A; b = matA->B;
1777   c = matB->A; d = matB->B;
1778 
1779   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1780   if (flg) {
1781     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1782   }
1783   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1784   PetscFunctionReturn(0);
1785 }
1786 
1787 #undef __FUNCT__
1788 #define __FUNCT__ "MatCopy_MPIAIJ"
1789 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1790 {
1791   PetscErrorCode ierr;
1792   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1793   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1794 
1795   PetscFunctionBegin;
1796   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1797   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1798     /* because of the column compression in the off-processor part of the matrix a->B,
1799        the number of columns in a->B and b->B may be different, hence we cannot call
1800        the MatCopy() directly on the two parts. If need be, we can provide a more
1801        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1802        then copying the submatrices */
1803     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1804   } else {
1805     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1806     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1807   }
1808   PetscFunctionReturn(0);
1809 }
1810 
1811 #undef __FUNCT__
1812 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1813 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1814 {
1815   PetscErrorCode ierr;
1816 
1817   PetscFunctionBegin;
1818   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1819   PetscFunctionReturn(0);
1820 }
1821 
1822 #include "petscblaslapack.h"
1823 #undef __FUNCT__
1824 #define __FUNCT__ "MatAXPY_MPIAIJ"
1825 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1826 {
1827   PetscErrorCode ierr;
1828   PetscInt       i;
1829   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1830   PetscBLASInt   bnz,one=1;
1831   Mat_SeqAIJ     *x,*y;
1832 
1833   PetscFunctionBegin;
1834   if (str == SAME_NONZERO_PATTERN) {
1835     PetscScalar alpha = a;
1836     x = (Mat_SeqAIJ *)xx->A->data;
1837     y = (Mat_SeqAIJ *)yy->A->data;
1838     bnz = PetscBLASIntCast(x->nz);
1839     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1840     x = (Mat_SeqAIJ *)xx->B->data;
1841     y = (Mat_SeqAIJ *)yy->B->data;
1842     bnz = PetscBLASIntCast(x->nz);
1843     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1844   } else if (str == SUBSET_NONZERO_PATTERN) {
1845     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1846 
1847     x = (Mat_SeqAIJ *)xx->B->data;
1848     y = (Mat_SeqAIJ *)yy->B->data;
1849     if (y->xtoy && y->XtoY != xx->B) {
1850       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1851       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1852     }
1853     if (!y->xtoy) { /* get xtoy */
1854       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1855       y->XtoY = xx->B;
1856       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1857     }
1858     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1859   } else {
1860     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1861   }
1862   PetscFunctionReturn(0);
1863 }
1864 
1865 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1866 
1867 #undef __FUNCT__
1868 #define __FUNCT__ "MatConjugate_MPIAIJ"
1869 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1870 {
1871 #if defined(PETSC_USE_COMPLEX)
1872   PetscErrorCode ierr;
1873   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1874 
1875   PetscFunctionBegin;
1876   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1877   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1878 #else
1879   PetscFunctionBegin;
1880 #endif
1881   PetscFunctionReturn(0);
1882 }
1883 
1884 #undef __FUNCT__
1885 #define __FUNCT__ "MatRealPart_MPIAIJ"
1886 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1887 {
1888   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1889   PetscErrorCode ierr;
1890 
1891   PetscFunctionBegin;
1892   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1893   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1894   PetscFunctionReturn(0);
1895 }
1896 
1897 #undef __FUNCT__
1898 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1899 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1900 {
1901   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1902   PetscErrorCode ierr;
1903 
1904   PetscFunctionBegin;
1905   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1906   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1907   PetscFunctionReturn(0);
1908 }
1909 
1910 #ifdef PETSC_HAVE_PBGL
1911 
1912 #include <boost/parallel/mpi/bsp_process_group.hpp>
1913 #include <boost/graph/distributed/ilu_default_graph.hpp>
1914 #include <boost/graph/distributed/ilu_0_block.hpp>
1915 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1916 #include <boost/graph/distributed/petsc/interface.hpp>
1917 #include <boost/multi_array.hpp>
1918 #include <boost/parallel/distributed_property_map->hpp>
1919 
1920 #undef __FUNCT__
1921 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1922 /*
1923   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1924 */
1925 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1926 {
1927   namespace petsc = boost::distributed::petsc;
1928 
1929   namespace graph_dist = boost::graph::distributed;
1930   using boost::graph::distributed::ilu_default::process_group_type;
1931   using boost::graph::ilu_permuted;
1932 
1933   PetscTruth      row_identity, col_identity;
1934   PetscContainer  c;
1935   PetscInt        m, n, M, N;
1936   PetscErrorCode  ierr;
1937 
1938   PetscFunctionBegin;
1939   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1940   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1941   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1942   if (!row_identity || !col_identity) {
1943     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1944   }
1945 
1946   process_group_type pg;
1947   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1948   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1949   lgraph_type&   level_graph = *lgraph_p;
1950   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1951 
1952   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1953   ilu_permuted(level_graph);
1954 
1955   /* put together the new matrix */
1956   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1957   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1958   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1959   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1960   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1961   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1962   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1963 
1964   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1965   ierr = PetscContainerSetPointer(c, lgraph_p);
1966   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1967   PetscFunctionReturn(0);
1968 }
1969 
1970 #undef __FUNCT__
1971 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1972 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1973 {
1974   PetscFunctionBegin;
1975   PetscFunctionReturn(0);
1976 }
1977 
1978 #undef __FUNCT__
1979 #define __FUNCT__ "MatSolve_MPIAIJ"
1980 /*
1981   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1982 */
1983 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1984 {
1985   namespace graph_dist = boost::graph::distributed;
1986 
1987   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1988   lgraph_type*   lgraph_p;
1989   PetscContainer c;
1990   PetscErrorCode ierr;
1991 
1992   PetscFunctionBegin;
1993   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1994   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1995   ierr = VecCopy(b, x);CHKERRQ(ierr);
1996 
1997   PetscScalar* array_x;
1998   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1999   PetscInt sx;
2000   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2001 
2002   PetscScalar* array_b;
2003   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2004   PetscInt sb;
2005   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2006 
2007   lgraph_type&   level_graph = *lgraph_p;
2008   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2009 
2010   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2011   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2012                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2013 
2014   typedef boost::iterator_property_map<array_ref_type::iterator,
2015                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2016   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2017                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2018 
2019   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2020 
2021   PetscFunctionReturn(0);
2022 }
2023 #endif
2024 
2025 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2026   PetscInt       nzlocal,nsends,nrecvs;
2027   PetscMPIInt    *send_rank,*recv_rank;
2028   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2029   PetscScalar    *sbuf_a,**rbuf_a;
2030   PetscErrorCode (*MatDestroy)(Mat);
2031 } Mat_Redundant;
2032 
2033 #undef __FUNCT__
2034 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2035 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2036 {
2037   PetscErrorCode       ierr;
2038   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2039   PetscInt             i;
2040 
2041   PetscFunctionBegin;
2042   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2043   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2044   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2045   for (i=0; i<redund->nrecvs; i++){
2046     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2047     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2048   }
2049   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2050   ierr = PetscFree(redund);CHKERRQ(ierr);
2051   PetscFunctionReturn(0);
2052 }
2053 
2054 #undef __FUNCT__
2055 #define __FUNCT__ "MatDestroy_MatRedundant"
2056 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2057 {
2058   PetscErrorCode  ierr;
2059   PetscContainer  container;
2060   Mat_Redundant   *redund=PETSC_NULL;
2061 
2062   PetscFunctionBegin;
2063   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2064   if (container) {
2065     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2066   } else {
2067     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2068   }
2069   A->ops->destroy = redund->MatDestroy;
2070   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2071   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2072   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2073   PetscFunctionReturn(0);
2074 }
2075 
2076 #undef __FUNCT__
2077 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2078 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2079 {
2080   PetscMPIInt    rank,size;
2081   MPI_Comm       comm=((PetscObject)mat)->comm;
2082   PetscErrorCode ierr;
2083   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2084   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2085   PetscInt       *rowrange=mat->rmap->range;
2086   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2087   Mat            A=aij->A,B=aij->B,C=*matredundant;
2088   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2089   PetscScalar    *sbuf_a;
2090   PetscInt       nzlocal=a->nz+b->nz;
2091   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2092   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2093   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2094   MatScalar      *aworkA,*aworkB;
2095   PetscScalar    *vals;
2096   PetscMPIInt    tag1,tag2,tag3,imdex;
2097   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2098                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2099   MPI_Status     recv_status,*send_status;
2100   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2101   PetscInt       **rbuf_j=PETSC_NULL;
2102   PetscScalar    **rbuf_a=PETSC_NULL;
2103   Mat_Redundant  *redund=PETSC_NULL;
2104   PetscContainer container;
2105 
2106   PetscFunctionBegin;
2107   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2108   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2109 
2110   if (reuse == MAT_REUSE_MATRIX) {
2111     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2112     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2113     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2114     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2115     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2116     if (container) {
2117       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2118     } else {
2119       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2120     }
2121     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2122 
2123     nsends    = redund->nsends;
2124     nrecvs    = redund->nrecvs;
2125     send_rank = redund->send_rank;
2126     recv_rank = redund->recv_rank;
2127     sbuf_nz   = redund->sbuf_nz;
2128     rbuf_nz   = redund->rbuf_nz;
2129     sbuf_j    = redund->sbuf_j;
2130     sbuf_a    = redund->sbuf_a;
2131     rbuf_j    = redund->rbuf_j;
2132     rbuf_a    = redund->rbuf_a;
2133   }
2134 
2135   if (reuse == MAT_INITIAL_MATRIX){
2136     PetscMPIInt  subrank,subsize;
2137     PetscInt     nleftover,np_subcomm;
2138     /* get the destination processors' id send_rank, nsends and nrecvs */
2139     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2140     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2141     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2142     np_subcomm = size/nsubcomm;
2143     nleftover  = size - nsubcomm*np_subcomm;
2144     nsends = 0; nrecvs = 0;
2145     for (i=0; i<size; i++){ /* i=rank*/
2146       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2147         send_rank[nsends] = i; nsends++;
2148         recv_rank[nrecvs++] = i;
2149       }
2150     }
2151     if (rank >= size - nleftover){/* this proc is a leftover processor */
2152       i = size-nleftover-1;
2153       j = 0;
2154       while (j < nsubcomm - nleftover){
2155         send_rank[nsends++] = i;
2156         i--; j++;
2157       }
2158     }
2159 
2160     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2161       for (i=0; i<nleftover; i++){
2162         recv_rank[nrecvs++] = size-nleftover+i;
2163       }
2164     }
2165 
2166     /* allocate sbuf_j, sbuf_a */
2167     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2168     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2169     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2170   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2171 
2172   /* copy mat's local entries into the buffers */
2173   if (reuse == MAT_INITIAL_MATRIX){
2174     rownz_max = 0;
2175     rptr = sbuf_j;
2176     cols = sbuf_j + rend-rstart + 1;
2177     vals = sbuf_a;
2178     rptr[0] = 0;
2179     for (i=0; i<rend-rstart; i++){
2180       row = i + rstart;
2181       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2182       ncols  = nzA + nzB;
2183       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2184       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2185       /* load the column indices for this row into cols */
2186       lwrite = 0;
2187       for (l=0; l<nzB; l++) {
2188         if ((ctmp = bmap[cworkB[l]]) < cstart){
2189           vals[lwrite]   = aworkB[l];
2190           cols[lwrite++] = ctmp;
2191         }
2192       }
2193       for (l=0; l<nzA; l++){
2194         vals[lwrite]   = aworkA[l];
2195         cols[lwrite++] = cstart + cworkA[l];
2196       }
2197       for (l=0; l<nzB; l++) {
2198         if ((ctmp = bmap[cworkB[l]]) >= cend){
2199           vals[lwrite]   = aworkB[l];
2200           cols[lwrite++] = ctmp;
2201         }
2202       }
2203       vals += ncols;
2204       cols += ncols;
2205       rptr[i+1] = rptr[i] + ncols;
2206       if (rownz_max < ncols) rownz_max = ncols;
2207     }
2208     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2209   } else { /* only copy matrix values into sbuf_a */
2210     rptr = sbuf_j;
2211     vals = sbuf_a;
2212     rptr[0] = 0;
2213     for (i=0; i<rend-rstart; i++){
2214       row = i + rstart;
2215       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2216       ncols  = nzA + nzB;
2217       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2218       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2219       lwrite = 0;
2220       for (l=0; l<nzB; l++) {
2221         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2222       }
2223       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2224       for (l=0; l<nzB; l++) {
2225         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2226       }
2227       vals += ncols;
2228       rptr[i+1] = rptr[i] + ncols;
2229     }
2230   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2231 
2232   /* send nzlocal to others, and recv other's nzlocal */
2233   /*--------------------------------------------------*/
2234   if (reuse == MAT_INITIAL_MATRIX){
2235     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2236     s_waits2 = s_waits3 + nsends;
2237     s_waits1 = s_waits2 + nsends;
2238     r_waits1 = s_waits1 + nsends;
2239     r_waits2 = r_waits1 + nrecvs;
2240     r_waits3 = r_waits2 + nrecvs;
2241   } else {
2242     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2243     r_waits3 = s_waits3 + nsends;
2244   }
2245 
2246   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2247   if (reuse == MAT_INITIAL_MATRIX){
2248     /* get new tags to keep the communication clean */
2249     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2250     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2251     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2252 
2253     /* post receives of other's nzlocal */
2254     for (i=0; i<nrecvs; i++){
2255       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2256     }
2257     /* send nzlocal to others */
2258     for (i=0; i<nsends; i++){
2259       sbuf_nz[i] = nzlocal;
2260       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2261     }
2262     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2263     count = nrecvs;
2264     while (count) {
2265       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2266       recv_rank[imdex] = recv_status.MPI_SOURCE;
2267       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2268       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2269 
2270       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2271       rbuf_nz[imdex] += i + 2;
2272       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2273       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2274       count--;
2275     }
2276     /* wait on sends of nzlocal */
2277     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2278     /* send mat->i,j to others, and recv from other's */
2279     /*------------------------------------------------*/
2280     for (i=0; i<nsends; i++){
2281       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2282       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2283     }
2284     /* wait on receives of mat->i,j */
2285     /*------------------------------*/
2286     count = nrecvs;
2287     while (count) {
2288       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2289       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2290       count--;
2291     }
2292     /* wait on sends of mat->i,j */
2293     /*---------------------------*/
2294     if (nsends) {
2295       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2296     }
2297   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2298 
2299   /* post receives, send and receive mat->a */
2300   /*----------------------------------------*/
2301   for (imdex=0; imdex<nrecvs; imdex++) {
2302     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2303   }
2304   for (i=0; i<nsends; i++){
2305     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2306   }
2307   count = nrecvs;
2308   while (count) {
2309     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2310     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2311     count--;
2312   }
2313   if (nsends) {
2314     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2315   }
2316 
2317   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2318 
2319   /* create redundant matrix */
2320   /*-------------------------*/
2321   if (reuse == MAT_INITIAL_MATRIX){
2322     /* compute rownz_max for preallocation */
2323     for (imdex=0; imdex<nrecvs; imdex++){
2324       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2325       rptr = rbuf_j[imdex];
2326       for (i=0; i<j; i++){
2327         ncols = rptr[i+1] - rptr[i];
2328         if (rownz_max < ncols) rownz_max = ncols;
2329       }
2330     }
2331 
2332     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2333     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2334     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2335     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2336     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2337   } else {
2338     C = *matredundant;
2339   }
2340 
2341   /* insert local matrix entries */
2342   rptr = sbuf_j;
2343   cols = sbuf_j + rend-rstart + 1;
2344   vals = sbuf_a;
2345   for (i=0; i<rend-rstart; i++){
2346     row   = i + rstart;
2347     ncols = rptr[i+1] - rptr[i];
2348     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2349     vals += ncols;
2350     cols += ncols;
2351   }
2352   /* insert received matrix entries */
2353   for (imdex=0; imdex<nrecvs; imdex++){
2354     rstart = rowrange[recv_rank[imdex]];
2355     rend   = rowrange[recv_rank[imdex]+1];
2356     rptr = rbuf_j[imdex];
2357     cols = rbuf_j[imdex] + rend-rstart + 1;
2358     vals = rbuf_a[imdex];
2359     for (i=0; i<rend-rstart; i++){
2360       row   = i + rstart;
2361       ncols = rptr[i+1] - rptr[i];
2362       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2363       vals += ncols;
2364       cols += ncols;
2365     }
2366   }
2367   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2368   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2369   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2370   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2371   if (reuse == MAT_INITIAL_MATRIX){
2372     PetscContainer container;
2373     *matredundant = C;
2374     /* create a supporting struct and attach it to C for reuse */
2375     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2376     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2377     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2378     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2379     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2380 
2381     redund->nzlocal = nzlocal;
2382     redund->nsends  = nsends;
2383     redund->nrecvs  = nrecvs;
2384     redund->send_rank = send_rank;
2385     redund->recv_rank = recv_rank;
2386     redund->sbuf_nz = sbuf_nz;
2387     redund->rbuf_nz = rbuf_nz;
2388     redund->sbuf_j  = sbuf_j;
2389     redund->sbuf_a  = sbuf_a;
2390     redund->rbuf_j  = rbuf_j;
2391     redund->rbuf_a  = rbuf_a;
2392 
2393     redund->MatDestroy = C->ops->destroy;
2394     C->ops->destroy    = MatDestroy_MatRedundant;
2395   }
2396   PetscFunctionReturn(0);
2397 }
2398 
2399 #undef __FUNCT__
2400 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2401 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2402 {
2403   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2404   PetscErrorCode ierr;
2405   PetscInt       i,*idxb = 0;
2406   PetscScalar    *va,*vb;
2407   Vec            vtmp;
2408 
2409   PetscFunctionBegin;
2410   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2411   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2412   if (idx) {
2413     for (i=0; i<A->rmap->n; i++) {
2414       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2415     }
2416   }
2417 
2418   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2419   if (idx) {
2420     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2421   }
2422   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2423   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2424 
2425   for (i=0; i<A->rmap->n; i++){
2426     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2427       va[i] = vb[i];
2428       if (idx) idx[i] = a->garray[idxb[i]];
2429     }
2430   }
2431 
2432   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2433   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2434   if (idxb) {
2435     ierr = PetscFree(idxb);CHKERRQ(ierr);
2436   }
2437   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2438   PetscFunctionReturn(0);
2439 }
2440 
2441 #undef __FUNCT__
2442 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2443 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2444 {
2445   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2446   PetscErrorCode ierr;
2447   PetscInt       i,*idxb = 0;
2448   PetscScalar    *va,*vb;
2449   Vec            vtmp;
2450 
2451   PetscFunctionBegin;
2452   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2453   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2454   if (idx) {
2455     for (i=0; i<A->cmap->n; i++) {
2456       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2457     }
2458   }
2459 
2460   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2461   if (idx) {
2462     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2463   }
2464   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2465   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2466 
2467   for (i=0; i<A->rmap->n; i++){
2468     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2469       va[i] = vb[i];
2470       if (idx) idx[i] = a->garray[idxb[i]];
2471     }
2472   }
2473 
2474   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2475   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2476   if (idxb) {
2477     ierr = PetscFree(idxb);CHKERRQ(ierr);
2478   }
2479   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2480   PetscFunctionReturn(0);
2481 }
2482 
2483 #undef __FUNCT__
2484 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2485 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2486 {
2487   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2488   PetscInt       n      = A->rmap->n;
2489   PetscInt       cstart = A->cmap->rstart;
2490   PetscInt      *cmap   = mat->garray;
2491   PetscInt      *diagIdx, *offdiagIdx;
2492   Vec            diagV, offdiagV;
2493   PetscScalar   *a, *diagA, *offdiagA;
2494   PetscInt       r;
2495   PetscErrorCode ierr;
2496 
2497   PetscFunctionBegin;
2498   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2499   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2500   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2501   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2502   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2503   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2504   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2505   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2506   for(r = 0; r < n; ++r) {
2507     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2508       a[r]   = diagA[r];
2509       idx[r] = cstart + diagIdx[r];
2510     } else {
2511       a[r]   = offdiagA[r];
2512       idx[r] = cmap[offdiagIdx[r]];
2513     }
2514   }
2515   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2516   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2517   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2518   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2519   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2520   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2521   PetscFunctionReturn(0);
2522 }
2523 
2524 #undef __FUNCT__
2525 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2526 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2527 {
2528   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2529   PetscInt       n      = A->rmap->n;
2530   PetscInt       cstart = A->cmap->rstart;
2531   PetscInt      *cmap   = mat->garray;
2532   PetscInt      *diagIdx, *offdiagIdx;
2533   Vec            diagV, offdiagV;
2534   PetscScalar   *a, *diagA, *offdiagA;
2535   PetscInt       r;
2536   PetscErrorCode ierr;
2537 
2538   PetscFunctionBegin;
2539   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2540   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2541   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2542   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2543   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2544   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2545   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2546   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2547   for(r = 0; r < n; ++r) {
2548     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2549       a[r]   = diagA[r];
2550       idx[r] = cstart + diagIdx[r];
2551     } else {
2552       a[r]   = offdiagA[r];
2553       idx[r] = cmap[offdiagIdx[r]];
2554     }
2555   }
2556   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2557   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2558   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2559   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2560   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2561   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2562   PetscFunctionReturn(0);
2563 }
2564 
2565 #undef __FUNCT__
2566 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2567 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2568 {
2569   PetscErrorCode ierr;
2570   Mat            *dummy;
2571 
2572   PetscFunctionBegin;
2573   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2574   *newmat = *dummy;
2575   ierr = PetscFree(dummy);CHKERRQ(ierr);
2576   PetscFunctionReturn(0);
2577 }
2578 
2579 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2580 /* -------------------------------------------------------------------*/
2581 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2582        MatGetRow_MPIAIJ,
2583        MatRestoreRow_MPIAIJ,
2584        MatMult_MPIAIJ,
2585 /* 4*/ MatMultAdd_MPIAIJ,
2586        MatMultTranspose_MPIAIJ,
2587        MatMultTransposeAdd_MPIAIJ,
2588 #ifdef PETSC_HAVE_PBGL
2589        MatSolve_MPIAIJ,
2590 #else
2591        0,
2592 #endif
2593        0,
2594        0,
2595 /*10*/ 0,
2596        0,
2597        0,
2598        MatSOR_MPIAIJ,
2599        MatTranspose_MPIAIJ,
2600 /*15*/ MatGetInfo_MPIAIJ,
2601        MatEqual_MPIAIJ,
2602        MatGetDiagonal_MPIAIJ,
2603        MatDiagonalScale_MPIAIJ,
2604        MatNorm_MPIAIJ,
2605 /*20*/ MatAssemblyBegin_MPIAIJ,
2606        MatAssemblyEnd_MPIAIJ,
2607        MatSetOption_MPIAIJ,
2608        MatZeroEntries_MPIAIJ,
2609 /*24*/ MatZeroRows_MPIAIJ,
2610        0,
2611 #ifdef PETSC_HAVE_PBGL
2612        0,
2613 #else
2614        0,
2615 #endif
2616        0,
2617        0,
2618 /*29*/ MatSetUpPreallocation_MPIAIJ,
2619 #ifdef PETSC_HAVE_PBGL
2620        0,
2621 #else
2622        0,
2623 #endif
2624        0,
2625        0,
2626        0,
2627 /*34*/ MatDuplicate_MPIAIJ,
2628        0,
2629        0,
2630        0,
2631        0,
2632 /*39*/ MatAXPY_MPIAIJ,
2633        MatGetSubMatrices_MPIAIJ,
2634        MatIncreaseOverlap_MPIAIJ,
2635        MatGetValues_MPIAIJ,
2636        MatCopy_MPIAIJ,
2637 /*44*/ MatGetRowMax_MPIAIJ,
2638        MatScale_MPIAIJ,
2639        0,
2640        0,
2641        0,
2642 /*49*/ MatSetBlockSize_MPIAIJ,
2643        0,
2644        0,
2645        0,
2646        0,
2647 /*54*/ MatFDColoringCreate_MPIAIJ,
2648        0,
2649        MatSetUnfactored_MPIAIJ,
2650        MatPermute_MPIAIJ,
2651        0,
2652 /*59*/ MatGetSubMatrix_MPIAIJ,
2653        MatDestroy_MPIAIJ,
2654        MatView_MPIAIJ,
2655        0,
2656        0,
2657 /*64*/ 0,
2658        0,
2659        0,
2660        0,
2661        0,
2662 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2663        MatGetRowMinAbs_MPIAIJ,
2664        0,
2665        MatSetColoring_MPIAIJ,
2666 #if defined(PETSC_HAVE_ADIC)
2667        MatSetValuesAdic_MPIAIJ,
2668 #else
2669        0,
2670 #endif
2671        MatSetValuesAdifor_MPIAIJ,
2672 /*75*/ MatFDColoringApply_AIJ,
2673        0,
2674        0,
2675        0,
2676        0,
2677 /*80*/ 0,
2678        0,
2679        0,
2680 /*83*/ MatLoad_MPIAIJ,
2681        0,
2682        0,
2683        0,
2684        0,
2685        0,
2686 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2687        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2688        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2689        MatPtAP_Basic,
2690        MatPtAPSymbolic_MPIAIJ,
2691 /*94*/ MatPtAPNumeric_MPIAIJ,
2692        0,
2693        0,
2694        0,
2695        0,
2696 /*99*/ 0,
2697        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2698        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2699        MatConjugate_MPIAIJ,
2700        0,
2701 /*104*/MatSetValuesRow_MPIAIJ,
2702        MatRealPart_MPIAIJ,
2703        MatImaginaryPart_MPIAIJ,
2704        0,
2705        0,
2706 /*109*/0,
2707        MatGetRedundantMatrix_MPIAIJ,
2708        MatGetRowMin_MPIAIJ,
2709        0,
2710        0,
2711 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
2712        0,
2713        0,
2714        0,
2715        0,
2716        0
2717 };
2718 
2719 /* ----------------------------------------------------------------------------------------*/
2720 
2721 EXTERN_C_BEGIN
2722 #undef __FUNCT__
2723 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2724 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2725 {
2726   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2727   PetscErrorCode ierr;
2728 
2729   PetscFunctionBegin;
2730   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2731   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2732   PetscFunctionReturn(0);
2733 }
2734 EXTERN_C_END
2735 
2736 EXTERN_C_BEGIN
2737 #undef __FUNCT__
2738 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2739 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2740 {
2741   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2742   PetscErrorCode ierr;
2743 
2744   PetscFunctionBegin;
2745   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2746   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2747   PetscFunctionReturn(0);
2748 }
2749 EXTERN_C_END
2750 
2751 #include "petscpc.h"
2752 EXTERN_C_BEGIN
2753 #undef __FUNCT__
2754 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2755 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2756 {
2757   Mat_MPIAIJ     *b;
2758   PetscErrorCode ierr;
2759   PetscInt       i;
2760 
2761   PetscFunctionBegin;
2762   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2763   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2764   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2765   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2766 
2767   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2768   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2769   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2770   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2771   if (d_nnz) {
2772     for (i=0; i<B->rmap->n; i++) {
2773       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2774     }
2775   }
2776   if (o_nnz) {
2777     for (i=0; i<B->rmap->n; i++) {
2778       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2779     }
2780   }
2781   b = (Mat_MPIAIJ*)B->data;
2782 
2783   if (!B->preallocated) {
2784     /* Explicitly create 2 MATSEQAIJ matrices. */
2785     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2786     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2787     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2788     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2789     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2790     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2791     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2792     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2793   }
2794 
2795   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2796   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2797   B->preallocated = PETSC_TRUE;
2798   PetscFunctionReturn(0);
2799 }
2800 EXTERN_C_END
2801 
2802 #undef __FUNCT__
2803 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2804 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2805 {
2806   Mat            mat;
2807   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2808   PetscErrorCode ierr;
2809 
2810   PetscFunctionBegin;
2811   *newmat       = 0;
2812   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2813   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2814   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2815   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2816   a    = (Mat_MPIAIJ*)mat->data;
2817 
2818   mat->factor       = matin->factor;
2819   mat->rmap->bs      = matin->rmap->bs;
2820   mat->assembled    = PETSC_TRUE;
2821   mat->insertmode   = NOT_SET_VALUES;
2822   mat->preallocated = PETSC_TRUE;
2823 
2824   a->size           = oldmat->size;
2825   a->rank           = oldmat->rank;
2826   a->donotstash     = oldmat->donotstash;
2827   a->roworiented    = oldmat->roworiented;
2828   a->rowindices     = 0;
2829   a->rowvalues      = 0;
2830   a->getrowactive   = PETSC_FALSE;
2831 
2832   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2833   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2834 
2835   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2836   if (oldmat->colmap) {
2837 #if defined (PETSC_USE_CTABLE)
2838     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2839 #else
2840     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2841     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2842     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2843 #endif
2844   } else a->colmap = 0;
2845   if (oldmat->garray) {
2846     PetscInt len;
2847     len  = oldmat->B->cmap->n;
2848     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2849     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2850     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2851   } else a->garray = 0;
2852 
2853   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2854   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2855   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2856   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2857   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2858   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2859   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2860   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2861   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2862   *newmat = mat;
2863   PetscFunctionReturn(0);
2864 }
2865 
2866 #include "petscsys.h"
2867 
2868 #undef __FUNCT__
2869 #define __FUNCT__ "MatLoad_MPIAIJ"
2870 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2871 {
2872   Mat            A;
2873   PetscScalar    *vals,*svals;
2874   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2875   MPI_Status     status;
2876   PetscErrorCode ierr;
2877   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz;
2878   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2879   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2880   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2881   PetscInt       cend,cstart,n,*rowners;
2882   int            fd;
2883 
2884   PetscFunctionBegin;
2885   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2886   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2887   if (!rank) {
2888     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2889     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2890     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2891   }
2892 
2893   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2894   M = header[1]; N = header[2];
2895   /* determine ownership of all rows */
2896   m    = M/size + ((M % size) > rank);
2897   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2898   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2899 
2900   /* First process needs enough room for process with most rows */
2901   if (!rank) {
2902     mmax       = rowners[1];
2903     for (i=2; i<size; i++) {
2904       mmax = PetscMax(mmax,rowners[i]);
2905     }
2906   } else mmax = m;
2907 
2908   rowners[0] = 0;
2909   for (i=2; i<=size; i++) {
2910     rowners[i] += rowners[i-1];
2911   }
2912   rstart = rowners[rank];
2913   rend   = rowners[rank+1];
2914 
2915   /* distribute row lengths to all processors */
2916   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2917   if (!rank) {
2918     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2919     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2920     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2921     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2922     for (j=0; j<m; j++) {
2923       procsnz[0] += ourlens[j];
2924     }
2925     for (i=1; i<size; i++) {
2926       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2927       /* calculate the number of nonzeros on each processor */
2928       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2929         procsnz[i] += rowlengths[j];
2930       }
2931       mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]);
2932       ierr   = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2933     }
2934     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2935   } else {
2936     mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr);
2937     ierr   = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2938   }
2939 
2940   if (!rank) {
2941     /* determine max buffer needed and allocate it */
2942     maxnz = 0;
2943     for (i=0; i<size; i++) {
2944       maxnz = PetscMax(maxnz,procsnz[i]);
2945     }
2946     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2947 
2948     /* read in my part of the matrix column indices  */
2949     nz   = procsnz[0];
2950     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2951     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2952 
2953     /* read in every one elses and ship off */
2954     for (i=1; i<size; i++) {
2955       nz     = procsnz[i];
2956       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2957       mpicnt = PetscMPIIntCast(nz);
2958       ierr   = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2959     }
2960     ierr = PetscFree(cols);CHKERRQ(ierr);
2961   } else {
2962     /* determine buffer space needed for message */
2963     nz = 0;
2964     for (i=0; i<m; i++) {
2965       nz += ourlens[i];
2966     }
2967     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2968 
2969     /* receive message of column indices*/
2970     mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr);
2971     ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2972     ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr);
2973     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
2974     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
2975     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
2976   }
2977 
2978   /* determine column ownership if matrix is not square */
2979   if (N != M) {
2980     n      = N/size + ((N % size) > rank);
2981     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2982     cstart = cend - n;
2983   } else {
2984     cstart = rstart;
2985     cend   = rend;
2986     n      = cend - cstart;
2987   }
2988 
2989   /* loop over local rows, determining number of off diagonal entries */
2990   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2991   jj = 0;
2992   for (i=0; i<m; i++) {
2993     for (j=0; j<ourlens[i]; j++) {
2994       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2995       jj++;
2996     }
2997   }
2998 
2999   /* create our matrix */
3000   for (i=0; i<m; i++) {
3001     ourlens[i] -= offlens[i];
3002   }
3003   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
3004   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
3005   ierr = MatSetType(A,type);CHKERRQ(ierr);
3006   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
3007 
3008   for (i=0; i<m; i++) {
3009     ourlens[i] += offlens[i];
3010   }
3011 
3012   if (!rank) {
3013     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3014 
3015     /* read in my part of the matrix numerical values  */
3016     nz   = procsnz[0];
3017     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3018 
3019     /* insert into matrix */
3020     jj      = rstart;
3021     smycols = mycols;
3022     svals   = vals;
3023     for (i=0; i<m; i++) {
3024       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3025       smycols += ourlens[i];
3026       svals   += ourlens[i];
3027       jj++;
3028     }
3029 
3030     /* read in other processors and ship out */
3031     for (i=1; i<size; i++) {
3032       nz     = procsnz[i];
3033       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3034       mpicnt = PetscMPIIntCast(nz);
3035       ierr   = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3036     }
3037     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3038   } else {
3039     /* receive numeric values */
3040     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3041 
3042     /* receive message of values*/
3043     mpicnt = PetscMPIIntCast(nz);
3044     ierr   = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
3045     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr);
3046     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
3047     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
3048     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
3049 
3050     /* insert into matrix */
3051     jj      = rstart;
3052     smycols = mycols;
3053     svals   = vals;
3054     for (i=0; i<m; i++) {
3055       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3056       smycols += ourlens[i];
3057       svals   += ourlens[i];
3058       jj++;
3059     }
3060   }
3061   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3062   ierr = PetscFree(vals);CHKERRQ(ierr);
3063   ierr = PetscFree(mycols);CHKERRQ(ierr);
3064   ierr = PetscFree(rowners);CHKERRQ(ierr);
3065 
3066   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3067   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3068   *newmat = A;
3069   PetscFunctionReturn(0);
3070 }
3071 
3072 #undef __FUNCT__
3073 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3074 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3075 {
3076   PetscErrorCode ierr;
3077   IS             iscol_local;
3078   PetscInt       csize;
3079 
3080   PetscFunctionBegin;
3081   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3082   if (call == MAT_REUSE_MATRIX) {
3083     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3084     if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3085   } else {
3086     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3087   }
3088   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3089   if (call == MAT_INITIAL_MATRIX) {
3090     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3091     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3092   }
3093   PetscFunctionReturn(0);
3094 }
3095 
3096 #undef __FUNCT__
3097 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3098 /*
3099     Not great since it makes two copies of the submatrix, first an SeqAIJ
3100   in local and then by concatenating the local matrices the end result.
3101   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3102 
3103   Note: This requires a sequential iscol with all indices.
3104 */
3105 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3106 {
3107   PetscErrorCode ierr;
3108   PetscMPIInt    rank,size;
3109   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3110   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3111   Mat            *local,M,Mreuse;
3112   MatScalar      *vwork,*aa;
3113   MPI_Comm       comm = ((PetscObject)mat)->comm;
3114   Mat_SeqAIJ     *aij;
3115 
3116 
3117   PetscFunctionBegin;
3118   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3119   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3120 
3121   if (call ==  MAT_REUSE_MATRIX) {
3122     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3123     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3124     local = &Mreuse;
3125     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3126   } else {
3127     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3128     Mreuse = *local;
3129     ierr   = PetscFree(local);CHKERRQ(ierr);
3130   }
3131 
3132   /*
3133       m - number of local rows
3134       n - number of columns (same on all processors)
3135       rstart - first row in new global matrix generated
3136   */
3137   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3138   if (call == MAT_INITIAL_MATRIX) {
3139     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3140     ii  = aij->i;
3141     jj  = aij->j;
3142 
3143     /*
3144         Determine the number of non-zeros in the diagonal and off-diagonal
3145         portions of the matrix in order to do correct preallocation
3146     */
3147 
3148     /* first get start and end of "diagonal" columns */
3149     if (csize == PETSC_DECIDE) {
3150       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3151       if (mglobal == n) { /* square matrix */
3152 	nlocal = m;
3153       } else {
3154         nlocal = n/size + ((n % size) > rank);
3155       }
3156     } else {
3157       nlocal = csize;
3158     }
3159     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3160     rstart = rend - nlocal;
3161     if (rank == size - 1 && rend != n) {
3162       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3163     }
3164 
3165     /* next, compute all the lengths */
3166     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3167     olens = dlens + m;
3168     for (i=0; i<m; i++) {
3169       jend = ii[i+1] - ii[i];
3170       olen = 0;
3171       dlen = 0;
3172       for (j=0; j<jend; j++) {
3173         if (*jj < rstart || *jj >= rend) olen++;
3174         else dlen++;
3175         jj++;
3176       }
3177       olens[i] = olen;
3178       dlens[i] = dlen;
3179     }
3180     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3181     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3182     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3183     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3184     ierr = PetscFree(dlens);CHKERRQ(ierr);
3185   } else {
3186     PetscInt ml,nl;
3187 
3188     M = *newmat;
3189     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3190     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3191     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3192     /*
3193          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3194        rather than the slower MatSetValues().
3195     */
3196     M->was_assembled = PETSC_TRUE;
3197     M->assembled     = PETSC_FALSE;
3198   }
3199   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3200   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3201   ii  = aij->i;
3202   jj  = aij->j;
3203   aa  = aij->a;
3204   for (i=0; i<m; i++) {
3205     row   = rstart + i;
3206     nz    = ii[i+1] - ii[i];
3207     cwork = jj;     jj += nz;
3208     vwork = aa;     aa += nz;
3209     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3210   }
3211 
3212   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3213   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3214   *newmat = M;
3215 
3216   /* save submatrix used in processor for next request */
3217   if (call ==  MAT_INITIAL_MATRIX) {
3218     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3219     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3220   }
3221 
3222   PetscFunctionReturn(0);
3223 }
3224 
3225 EXTERN_C_BEGIN
3226 #undef __FUNCT__
3227 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3228 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3229 {
3230   PetscInt       m,cstart, cend,j,nnz,i,d;
3231   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3232   const PetscInt *JJ;
3233   PetscScalar    *values;
3234   PetscErrorCode ierr;
3235 
3236   PetscFunctionBegin;
3237   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3238 
3239   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3240   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3241   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3242   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3243   m      = B->rmap->n;
3244   cstart = B->cmap->rstart;
3245   cend   = B->cmap->rend;
3246   rstart = B->rmap->rstart;
3247 
3248   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3249 
3250 #if defined(PETSC_USE_DEBUGGING)
3251   for (i=0; i<m; i++) {
3252     nnz     = Ii[i+1]- Ii[i];
3253     JJ      = J + Ii[i];
3254     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3255     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3256     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3257   }
3258 #endif
3259 
3260   for (i=0; i<m; i++) {
3261     nnz     = Ii[i+1]- Ii[i];
3262     JJ      = J + Ii[i];
3263     nnz_max = PetscMax(nnz_max,nnz);
3264     d       = 0;
3265     for (j=0; j<nnz; j++) {
3266       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3267     }
3268     d_nnz[i] = d;
3269     o_nnz[i] = nnz - d;
3270   }
3271   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3272   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3273 
3274   if (v) values = (PetscScalar*)v;
3275   else {
3276     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3277     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3278   }
3279 
3280   for (i=0; i<m; i++) {
3281     ii   = i + rstart;
3282     nnz  = Ii[i+1]- Ii[i];
3283     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3284   }
3285   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3286   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3287 
3288   if (!v) {
3289     ierr = PetscFree(values);CHKERRQ(ierr);
3290   }
3291   PetscFunctionReturn(0);
3292 }
3293 EXTERN_C_END
3294 
3295 #undef __FUNCT__
3296 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3297 /*@
3298    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3299    (the default parallel PETSc format).
3300 
3301    Collective on MPI_Comm
3302 
3303    Input Parameters:
3304 +  B - the matrix
3305 .  i - the indices into j for the start of each local row (starts with zero)
3306 .  j - the column indices for each local row (starts with zero)
3307 -  v - optional values in the matrix
3308 
3309    Level: developer
3310 
3311    Notes:
3312        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3313      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3314      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3315 
3316        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3317 
3318        The format which is used for the sparse matrix input, is equivalent to a
3319     row-major ordering.. i.e for the following matrix, the input data expected is
3320     as shown:
3321 
3322         1 0 0
3323         2 0 3     P0
3324        -------
3325         4 5 6     P1
3326 
3327      Process0 [P0]: rows_owned=[0,1]
3328         i =  {0,1,3}  [size = nrow+1  = 2+1]
3329         j =  {0,0,2}  [size = nz = 6]
3330         v =  {1,2,3}  [size = nz = 6]
3331 
3332      Process1 [P1]: rows_owned=[2]
3333         i =  {0,3}    [size = nrow+1  = 1+1]
3334         j =  {0,1,2}  [size = nz = 6]
3335         v =  {4,5,6}  [size = nz = 6]
3336 
3337 .keywords: matrix, aij, compressed row, sparse, parallel
3338 
3339 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3340           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3341 @*/
3342 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3343 {
3344   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3345 
3346   PetscFunctionBegin;
3347   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3348   if (f) {
3349     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3350   }
3351   PetscFunctionReturn(0);
3352 }
3353 
3354 #undef __FUNCT__
3355 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3356 /*@C
3357    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3358    (the default parallel PETSc format).  For good matrix assembly performance
3359    the user should preallocate the matrix storage by setting the parameters
3360    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3361    performance can be increased by more than a factor of 50.
3362 
3363    Collective on MPI_Comm
3364 
3365    Input Parameters:
3366 +  A - the matrix
3367 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3368            (same value is used for all local rows)
3369 .  d_nnz - array containing the number of nonzeros in the various rows of the
3370            DIAGONAL portion of the local submatrix (possibly different for each row)
3371            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3372            The size of this array is equal to the number of local rows, i.e 'm'.
3373            You must leave room for the diagonal entry even if it is zero.
3374 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3375            submatrix (same value is used for all local rows).
3376 -  o_nnz - array containing the number of nonzeros in the various rows of the
3377            OFF-DIAGONAL portion of the local submatrix (possibly different for
3378            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3379            structure. The size of this array is equal to the number
3380            of local rows, i.e 'm'.
3381 
3382    If the *_nnz parameter is given then the *_nz parameter is ignored
3383 
3384    The AIJ format (also called the Yale sparse matrix format or
3385    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3386    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3387 
3388    The parallel matrix is partitioned such that the first m0 rows belong to
3389    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3390    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3391 
3392    The DIAGONAL portion of the local submatrix of a processor can be defined
3393    as the submatrix which is obtained by extraction the part corresponding
3394    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3395    first row that belongs to the processor, and r2 is the last row belonging
3396    to the this processor. This is a square mxm matrix. The remaining portion
3397    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3398 
3399    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3400 
3401    You can call MatGetInfo() to get information on how effective the preallocation was;
3402    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3403    You can also run with the option -info and look for messages with the string
3404    malloc in them to see if additional memory allocation was needed.
3405 
3406    Example usage:
3407 
3408    Consider the following 8x8 matrix with 34 non-zero values, that is
3409    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3410    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3411    as follows:
3412 
3413 .vb
3414             1  2  0  |  0  3  0  |  0  4
3415     Proc0   0  5  6  |  7  0  0  |  8  0
3416             9  0 10  | 11  0  0  | 12  0
3417     -------------------------------------
3418            13  0 14  | 15 16 17  |  0  0
3419     Proc1   0 18  0  | 19 20 21  |  0  0
3420             0  0  0  | 22 23  0  | 24  0
3421     -------------------------------------
3422     Proc2  25 26 27  |  0  0 28  | 29  0
3423            30  0  0  | 31 32 33  |  0 34
3424 .ve
3425 
3426    This can be represented as a collection of submatrices as:
3427 
3428 .vb
3429       A B C
3430       D E F
3431       G H I
3432 .ve
3433 
3434    Where the submatrices A,B,C are owned by proc0, D,E,F are
3435    owned by proc1, G,H,I are owned by proc2.
3436 
3437    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3438    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3439    The 'M','N' parameters are 8,8, and have the same values on all procs.
3440 
3441    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3442    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3443    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3444    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3445    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3446    matrix, ans [DF] as another SeqAIJ matrix.
3447 
3448    When d_nz, o_nz parameters are specified, d_nz storage elements are
3449    allocated for every row of the local diagonal submatrix, and o_nz
3450    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3451    One way to choose d_nz and o_nz is to use the max nonzerors per local
3452    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3453    In this case, the values of d_nz,o_nz are:
3454 .vb
3455      proc0 : dnz = 2, o_nz = 2
3456      proc1 : dnz = 3, o_nz = 2
3457      proc2 : dnz = 1, o_nz = 4
3458 .ve
3459    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3460    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3461    for proc3. i.e we are using 12+15+10=37 storage locations to store
3462    34 values.
3463 
3464    When d_nnz, o_nnz parameters are specified, the storage is specified
3465    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3466    In the above case the values for d_nnz,o_nnz are:
3467 .vb
3468      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3469      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3470      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3471 .ve
3472    Here the space allocated is sum of all the above values i.e 34, and
3473    hence pre-allocation is perfect.
3474 
3475    Level: intermediate
3476 
3477 .keywords: matrix, aij, compressed row, sparse, parallel
3478 
3479 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3480           MPIAIJ, MatGetInfo()
3481 @*/
3482 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3483 {
3484   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3485 
3486   PetscFunctionBegin;
3487   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3488   if (f) {
3489     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3490   }
3491   PetscFunctionReturn(0);
3492 }
3493 
3494 #undef __FUNCT__
3495 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3496 /*@
3497      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3498          CSR format the local rows.
3499 
3500    Collective on MPI_Comm
3501 
3502    Input Parameters:
3503 +  comm - MPI communicator
3504 .  m - number of local rows (Cannot be PETSC_DECIDE)
3505 .  n - This value should be the same as the local size used in creating the
3506        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3507        calculated if N is given) For square matrices n is almost always m.
3508 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3509 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3510 .   i - row indices
3511 .   j - column indices
3512 -   a - matrix values
3513 
3514    Output Parameter:
3515 .   mat - the matrix
3516 
3517    Level: intermediate
3518 
3519    Notes:
3520        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3521      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3522      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3523 
3524        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3525 
3526        The format which is used for the sparse matrix input, is equivalent to a
3527     row-major ordering.. i.e for the following matrix, the input data expected is
3528     as shown:
3529 
3530         1 0 0
3531         2 0 3     P0
3532        -------
3533         4 5 6     P1
3534 
3535      Process0 [P0]: rows_owned=[0,1]
3536         i =  {0,1,3}  [size = nrow+1  = 2+1]
3537         j =  {0,0,2}  [size = nz = 6]
3538         v =  {1,2,3}  [size = nz = 6]
3539 
3540      Process1 [P1]: rows_owned=[2]
3541         i =  {0,3}    [size = nrow+1  = 1+1]
3542         j =  {0,1,2}  [size = nz = 6]
3543         v =  {4,5,6}  [size = nz = 6]
3544 
3545 .keywords: matrix, aij, compressed row, sparse, parallel
3546 
3547 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3548           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3549 @*/
3550 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3551 {
3552   PetscErrorCode ierr;
3553 
3554  PetscFunctionBegin;
3555   if (i[0]) {
3556     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3557   }
3558   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3559   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3560   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3561   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3562   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3563   PetscFunctionReturn(0);
3564 }
3565 
3566 #undef __FUNCT__
3567 #define __FUNCT__ "MatCreateMPIAIJ"
3568 /*@C
3569    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3570    (the default parallel PETSc format).  For good matrix assembly performance
3571    the user should preallocate the matrix storage by setting the parameters
3572    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3573    performance can be increased by more than a factor of 50.
3574 
3575    Collective on MPI_Comm
3576 
3577    Input Parameters:
3578 +  comm - MPI communicator
3579 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3580            This value should be the same as the local size used in creating the
3581            y vector for the matrix-vector product y = Ax.
3582 .  n - This value should be the same as the local size used in creating the
3583        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3584        calculated if N is given) For square matrices n is almost always m.
3585 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3586 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3587 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3588            (same value is used for all local rows)
3589 .  d_nnz - array containing the number of nonzeros in the various rows of the
3590            DIAGONAL portion of the local submatrix (possibly different for each row)
3591            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3592            The size of this array is equal to the number of local rows, i.e 'm'.
3593            You must leave room for the diagonal entry even if it is zero.
3594 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3595            submatrix (same value is used for all local rows).
3596 -  o_nnz - array containing the number of nonzeros in the various rows of the
3597            OFF-DIAGONAL portion of the local submatrix (possibly different for
3598            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3599            structure. The size of this array is equal to the number
3600            of local rows, i.e 'm'.
3601 
3602    Output Parameter:
3603 .  A - the matrix
3604 
3605    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3606    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3607    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3608 
3609    Notes:
3610    If the *_nnz parameter is given then the *_nz parameter is ignored
3611 
3612    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3613    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3614    storage requirements for this matrix.
3615 
3616    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3617    processor than it must be used on all processors that share the object for
3618    that argument.
3619 
3620    The user MUST specify either the local or global matrix dimensions
3621    (possibly both).
3622 
3623    The parallel matrix is partitioned across processors such that the
3624    first m0 rows belong to process 0, the next m1 rows belong to
3625    process 1, the next m2 rows belong to process 2 etc.. where
3626    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3627    values corresponding to [m x N] submatrix.
3628 
3629    The columns are logically partitioned with the n0 columns belonging
3630    to 0th partition, the next n1 columns belonging to the next
3631    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3632 
3633    The DIAGONAL portion of the local submatrix on any given processor
3634    is the submatrix corresponding to the rows and columns m,n
3635    corresponding to the given processor. i.e diagonal matrix on
3636    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3637    etc. The remaining portion of the local submatrix [m x (N-n)]
3638    constitute the OFF-DIAGONAL portion. The example below better
3639    illustrates this concept.
3640 
3641    For a square global matrix we define each processor's diagonal portion
3642    to be its local rows and the corresponding columns (a square submatrix);
3643    each processor's off-diagonal portion encompasses the remainder of the
3644    local matrix (a rectangular submatrix).
3645 
3646    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3647 
3648    When calling this routine with a single process communicator, a matrix of
3649    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3650    type of communicator, use the construction mechanism:
3651      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3652 
3653    By default, this format uses inodes (identical nodes) when possible.
3654    We search for consecutive rows with the same nonzero structure, thereby
3655    reusing matrix information to achieve increased efficiency.
3656 
3657    Options Database Keys:
3658 +  -mat_no_inode  - Do not use inodes
3659 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3660 -  -mat_aij_oneindex - Internally use indexing starting at 1
3661         rather than 0.  Note that when calling MatSetValues(),
3662         the user still MUST index entries starting at 0!
3663 
3664 
3665    Example usage:
3666 
3667    Consider the following 8x8 matrix with 34 non-zero values, that is
3668    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3669    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3670    as follows:
3671 
3672 .vb
3673             1  2  0  |  0  3  0  |  0  4
3674     Proc0   0  5  6  |  7  0  0  |  8  0
3675             9  0 10  | 11  0  0  | 12  0
3676     -------------------------------------
3677            13  0 14  | 15 16 17  |  0  0
3678     Proc1   0 18  0  | 19 20 21  |  0  0
3679             0  0  0  | 22 23  0  | 24  0
3680     -------------------------------------
3681     Proc2  25 26 27  |  0  0 28  | 29  0
3682            30  0  0  | 31 32 33  |  0 34
3683 .ve
3684 
3685    This can be represented as a collection of submatrices as:
3686 
3687 .vb
3688       A B C
3689       D E F
3690       G H I
3691 .ve
3692 
3693    Where the submatrices A,B,C are owned by proc0, D,E,F are
3694    owned by proc1, G,H,I are owned by proc2.
3695 
3696    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3697    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3698    The 'M','N' parameters are 8,8, and have the same values on all procs.
3699 
3700    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3701    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3702    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3703    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3704    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3705    matrix, ans [DF] as another SeqAIJ matrix.
3706 
3707    When d_nz, o_nz parameters are specified, d_nz storage elements are
3708    allocated for every row of the local diagonal submatrix, and o_nz
3709    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3710    One way to choose d_nz and o_nz is to use the max nonzerors per local
3711    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3712    In this case, the values of d_nz,o_nz are:
3713 .vb
3714      proc0 : dnz = 2, o_nz = 2
3715      proc1 : dnz = 3, o_nz = 2
3716      proc2 : dnz = 1, o_nz = 4
3717 .ve
3718    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3719    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3720    for proc3. i.e we are using 12+15+10=37 storage locations to store
3721    34 values.
3722 
3723    When d_nnz, o_nnz parameters are specified, the storage is specified
3724    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3725    In the above case the values for d_nnz,o_nnz are:
3726 .vb
3727      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3728      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3729      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3730 .ve
3731    Here the space allocated is sum of all the above values i.e 34, and
3732    hence pre-allocation is perfect.
3733 
3734    Level: intermediate
3735 
3736 .keywords: matrix, aij, compressed row, sparse, parallel
3737 
3738 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3739           MPIAIJ, MatCreateMPIAIJWithArrays()
3740 @*/
3741 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3742 {
3743   PetscErrorCode ierr;
3744   PetscMPIInt    size;
3745 
3746   PetscFunctionBegin;
3747   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3748   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3749   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3750   if (size > 1) {
3751     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3752     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3753   } else {
3754     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3755     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3756   }
3757   PetscFunctionReturn(0);
3758 }
3759 
3760 #undef __FUNCT__
3761 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3762 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3763 {
3764   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3765 
3766   PetscFunctionBegin;
3767   *Ad     = a->A;
3768   *Ao     = a->B;
3769   *colmap = a->garray;
3770   PetscFunctionReturn(0);
3771 }
3772 
3773 #undef __FUNCT__
3774 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3775 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3776 {
3777   PetscErrorCode ierr;
3778   PetscInt       i;
3779   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3780 
3781   PetscFunctionBegin;
3782   if (coloring->ctype == IS_COLORING_GLOBAL) {
3783     ISColoringValue *allcolors,*colors;
3784     ISColoring      ocoloring;
3785 
3786     /* set coloring for diagonal portion */
3787     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3788 
3789     /* set coloring for off-diagonal portion */
3790     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3791     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3792     for (i=0; i<a->B->cmap->n; i++) {
3793       colors[i] = allcolors[a->garray[i]];
3794     }
3795     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3796     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3797     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3798     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3799   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3800     ISColoringValue *colors;
3801     PetscInt        *larray;
3802     ISColoring      ocoloring;
3803 
3804     /* set coloring for diagonal portion */
3805     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3806     for (i=0; i<a->A->cmap->n; i++) {
3807       larray[i] = i + A->cmap->rstart;
3808     }
3809     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3810     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3811     for (i=0; i<a->A->cmap->n; i++) {
3812       colors[i] = coloring->colors[larray[i]];
3813     }
3814     ierr = PetscFree(larray);CHKERRQ(ierr);
3815     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3816     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3817     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3818 
3819     /* set coloring for off-diagonal portion */
3820     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3821     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3822     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3823     for (i=0; i<a->B->cmap->n; i++) {
3824       colors[i] = coloring->colors[larray[i]];
3825     }
3826     ierr = PetscFree(larray);CHKERRQ(ierr);
3827     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3828     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3829     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3830   } else {
3831     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3832   }
3833 
3834   PetscFunctionReturn(0);
3835 }
3836 
3837 #if defined(PETSC_HAVE_ADIC)
3838 #undef __FUNCT__
3839 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3840 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3841 {
3842   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3843   PetscErrorCode ierr;
3844 
3845   PetscFunctionBegin;
3846   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3847   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3848   PetscFunctionReturn(0);
3849 }
3850 #endif
3851 
3852 #undef __FUNCT__
3853 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3854 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3855 {
3856   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3857   PetscErrorCode ierr;
3858 
3859   PetscFunctionBegin;
3860   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3861   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3862   PetscFunctionReturn(0);
3863 }
3864 
3865 #undef __FUNCT__
3866 #define __FUNCT__ "MatMerge"
3867 /*@
3868       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3869                  matrices from each processor
3870 
3871     Collective on MPI_Comm
3872 
3873    Input Parameters:
3874 +    comm - the communicators the parallel matrix will live on
3875 .    inmat - the input sequential matrices
3876 .    n - number of local columns (or PETSC_DECIDE)
3877 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3878 
3879    Output Parameter:
3880 .    outmat - the parallel matrix generated
3881 
3882     Level: advanced
3883 
3884    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3885 
3886 @*/
3887 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3888 {
3889   PetscErrorCode ierr;
3890   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3891   PetscInt       *indx;
3892   PetscScalar    *values;
3893 
3894   PetscFunctionBegin;
3895   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3896   if (scall == MAT_INITIAL_MATRIX){
3897     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3898     if (n == PETSC_DECIDE){
3899       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3900     }
3901     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3902     rstart -= m;
3903 
3904     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3905     for (i=0;i<m;i++) {
3906       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3907       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3908       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3909     }
3910     /* This routine will ONLY return MPIAIJ type matrix */
3911     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3912     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3913     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3914     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3915     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3916 
3917   } else if (scall == MAT_REUSE_MATRIX){
3918     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3919   } else {
3920     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3921   }
3922 
3923   for (i=0;i<m;i++) {
3924     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3925     Ii    = i + rstart;
3926     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3927     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3928   }
3929   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3930   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3931   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3932 
3933   PetscFunctionReturn(0);
3934 }
3935 
3936 #undef __FUNCT__
3937 #define __FUNCT__ "MatFileSplit"
3938 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3939 {
3940   PetscErrorCode    ierr;
3941   PetscMPIInt       rank;
3942   PetscInt          m,N,i,rstart,nnz;
3943   size_t            len;
3944   const PetscInt    *indx;
3945   PetscViewer       out;
3946   char              *name;
3947   Mat               B;
3948   const PetscScalar *values;
3949 
3950   PetscFunctionBegin;
3951   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3952   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3953   /* Should this be the type of the diagonal block of A? */
3954   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3955   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3956   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3957   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3958   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3959   for (i=0;i<m;i++) {
3960     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3961     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3962     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3963   }
3964   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3965   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3966 
3967   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3968   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3969   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3970   sprintf(name,"%s.%d",outfile,rank);
3971   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3972   ierr = PetscFree(name);
3973   ierr = MatView(B,out);CHKERRQ(ierr);
3974   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3975   ierr = MatDestroy(B);CHKERRQ(ierr);
3976   PetscFunctionReturn(0);
3977 }
3978 
3979 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3980 #undef __FUNCT__
3981 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3982 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3983 {
3984   PetscErrorCode       ierr;
3985   Mat_Merge_SeqsToMPI  *merge;
3986   PetscContainer       container;
3987 
3988   PetscFunctionBegin;
3989   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3990   if (container) {
3991     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3992     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3993     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3994     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3995     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3996     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3997     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3998     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3999     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4000     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4001     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4002     ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr);
4003 
4004     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4005     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4006   }
4007   ierr = PetscFree(merge);CHKERRQ(ierr);
4008 
4009   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4010   PetscFunctionReturn(0);
4011 }
4012 
4013 #include "../src/mat/utils/freespace.h"
4014 #include "petscbt.h"
4015 
4016 #undef __FUNCT__
4017 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4018 /*@C
4019       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4020                  matrices from each processor
4021 
4022     Collective on MPI_Comm
4023 
4024    Input Parameters:
4025 +    comm - the communicators the parallel matrix will live on
4026 .    seqmat - the input sequential matrices
4027 .    m - number of local rows (or PETSC_DECIDE)
4028 .    n - number of local columns (or PETSC_DECIDE)
4029 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4030 
4031    Output Parameter:
4032 .    mpimat - the parallel matrix generated
4033 
4034     Level: advanced
4035 
4036    Notes:
4037      The dimensions of the sequential matrix in each processor MUST be the same.
4038      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4039      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4040 @*/
4041 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4042 {
4043   PetscErrorCode       ierr;
4044   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4045   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4046   PetscMPIInt          size,rank,taga,*len_s;
4047   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4048   PetscInt             proc,m;
4049   PetscInt             **buf_ri,**buf_rj;
4050   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4051   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4052   MPI_Request          *s_waits,*r_waits;
4053   MPI_Status           *status;
4054   MatScalar            *aa=a->a;
4055   MatScalar            **abuf_r,*ba_i;
4056   Mat_Merge_SeqsToMPI  *merge;
4057   PetscContainer       container;
4058 
4059   PetscFunctionBegin;
4060   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4061 
4062   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4063   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4064 
4065   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4066   if (container) {
4067     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4068   }
4069   bi     = merge->bi;
4070   bj     = merge->bj;
4071   buf_ri = merge->buf_ri;
4072   buf_rj = merge->buf_rj;
4073 
4074   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4075   owners = merge->rowmap->range;
4076   len_s  = merge->len_s;
4077 
4078   /* send and recv matrix values */
4079   /*-----------------------------*/
4080   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4081   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4082 
4083   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4084   for (proc=0,k=0; proc<size; proc++){
4085     if (!len_s[proc]) continue;
4086     i = owners[proc];
4087     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4088     k++;
4089   }
4090 
4091   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4092   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4093   ierr = PetscFree(status);CHKERRQ(ierr);
4094 
4095   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4096   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4097 
4098   /* insert mat values of mpimat */
4099   /*----------------------------*/
4100   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4101   ierr = PetscMalloc3(merge->nrecv,PetscInt**,&buf_ri_k,merge->nrecv,PetscInt**,&nextrow,merge->nrecv,PetscInt**,&nextai);CHKERRQ(ierr);
4102 
4103   for (k=0; k<merge->nrecv; k++){
4104     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4105     nrows = *(buf_ri_k[k]);
4106     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4107     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4108   }
4109 
4110   /* set values of ba */
4111   m = merge->rowmap->n;
4112   for (i=0; i<m; i++) {
4113     arow = owners[rank] + i;
4114     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4115     bnzi = bi[i+1] - bi[i];
4116     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4117 
4118     /* add local non-zero vals of this proc's seqmat into ba */
4119     anzi = ai[arow+1] - ai[arow];
4120     aj   = a->j + ai[arow];
4121     aa   = a->a + ai[arow];
4122     nextaj = 0;
4123     for (j=0; nextaj<anzi; j++){
4124       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4125         ba_i[j] += aa[nextaj++];
4126       }
4127     }
4128 
4129     /* add received vals into ba */
4130     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4131       /* i-th row */
4132       if (i == *nextrow[k]) {
4133         anzi = *(nextai[k]+1) - *nextai[k];
4134         aj   = buf_rj[k] + *(nextai[k]);
4135         aa   = abuf_r[k] + *(nextai[k]);
4136         nextaj = 0;
4137         for (j=0; nextaj<anzi; j++){
4138           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4139             ba_i[j] += aa[nextaj++];
4140           }
4141         }
4142         nextrow[k]++; nextai[k]++;
4143       }
4144     }
4145     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4146   }
4147   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4148   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4149 
4150   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4151   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4152   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4153   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4154   PetscFunctionReturn(0);
4155 }
4156 
4157 #undef __FUNCT__
4158 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4159 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4160 {
4161   PetscErrorCode       ierr;
4162   Mat                  B_mpi;
4163   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4164   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4165   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4166   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4167   PetscInt             len,proc,*dnz,*onz;
4168   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4169   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4170   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4171   MPI_Status           *status;
4172   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4173   PetscBT              lnkbt;
4174   Mat_Merge_SeqsToMPI  *merge;
4175   PetscContainer       container;
4176 
4177   PetscFunctionBegin;
4178   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4179 
4180   /* make sure it is a PETSc comm */
4181   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4182   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4183   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4184 
4185   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4186   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4187 
4188   /* determine row ownership */
4189   /*---------------------------------------------------------*/
4190   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4191   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4192   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4193   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4194   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4195   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4196   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4197 
4198   m      = merge->rowmap->n;
4199   M      = merge->rowmap->N;
4200   owners = merge->rowmap->range;
4201 
4202   /* determine the number of messages to send, their lengths */
4203   /*---------------------------------------------------------*/
4204   len_s  = merge->len_s;
4205 
4206   len = 0;  /* length of buf_si[] */
4207   merge->nsend = 0;
4208   for (proc=0; proc<size; proc++){
4209     len_si[proc] = 0;
4210     if (proc == rank){
4211       len_s[proc] = 0;
4212     } else {
4213       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4214       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4215     }
4216     if (len_s[proc]) {
4217       merge->nsend++;
4218       nrows = 0;
4219       for (i=owners[proc]; i<owners[proc+1]; i++){
4220         if (ai[i+1] > ai[i]) nrows++;
4221       }
4222       len_si[proc] = 2*(nrows+1);
4223       len += len_si[proc];
4224     }
4225   }
4226 
4227   /* determine the number and length of messages to receive for ij-structure */
4228   /*-------------------------------------------------------------------------*/
4229   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4230   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4231 
4232   /* post the Irecv of j-structure */
4233   /*-------------------------------*/
4234   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4235   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4236 
4237   /* post the Isend of j-structure */
4238   /*--------------------------------*/
4239   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4240 
4241   for (proc=0, k=0; proc<size; proc++){
4242     if (!len_s[proc]) continue;
4243     i = owners[proc];
4244     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4245     k++;
4246   }
4247 
4248   /* receives and sends of j-structure are complete */
4249   /*------------------------------------------------*/
4250   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4251   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4252 
4253   /* send and recv i-structure */
4254   /*---------------------------*/
4255   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4256   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4257 
4258   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4259   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4260   for (proc=0,k=0; proc<size; proc++){
4261     if (!len_s[proc]) continue;
4262     /* form outgoing message for i-structure:
4263          buf_si[0]:                 nrows to be sent
4264                [1:nrows]:           row index (global)
4265                [nrows+1:2*nrows+1]: i-structure index
4266     */
4267     /*-------------------------------------------*/
4268     nrows = len_si[proc]/2 - 1;
4269     buf_si_i    = buf_si + nrows+1;
4270     buf_si[0]   = nrows;
4271     buf_si_i[0] = 0;
4272     nrows = 0;
4273     for (i=owners[proc]; i<owners[proc+1]; i++){
4274       anzi = ai[i+1] - ai[i];
4275       if (anzi) {
4276         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4277         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4278         nrows++;
4279       }
4280     }
4281     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4282     k++;
4283     buf_si += len_si[proc];
4284   }
4285 
4286   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4287   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4288 
4289   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4290   for (i=0; i<merge->nrecv; i++){
4291     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4292   }
4293 
4294   ierr = PetscFree(len_si);CHKERRQ(ierr);
4295   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4296   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4297   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4298   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4299   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4300   ierr = PetscFree(status);CHKERRQ(ierr);
4301 
4302   /* compute a local seq matrix in each processor */
4303   /*----------------------------------------------*/
4304   /* allocate bi array and free space for accumulating nonzero column info */
4305   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4306   bi[0] = 0;
4307 
4308   /* create and initialize a linked list */
4309   nlnk = N+1;
4310   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4311 
4312   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4313   len = 0;
4314   len  = ai[owners[rank+1]] - ai[owners[rank]];
4315   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4316   current_space = free_space;
4317 
4318   /* determine symbolic info for each local row */
4319   ierr = PetscMalloc3(merge->nrecv,PetscInt**,&buf_ri_k,merge->nrecv,PetscInt**,&nextrow,merge->nrecv,PetscInt**,&nextai);CHKERRQ(ierr);
4320 
4321   for (k=0; k<merge->nrecv; k++){
4322     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4323     nrows = *buf_ri_k[k];
4324     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4325     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4326   }
4327 
4328   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4329   len = 0;
4330   for (i=0;i<m;i++) {
4331     bnzi   = 0;
4332     /* add local non-zero cols of this proc's seqmat into lnk */
4333     arow   = owners[rank] + i;
4334     anzi   = ai[arow+1] - ai[arow];
4335     aj     = a->j + ai[arow];
4336     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4337     bnzi += nlnk;
4338     /* add received col data into lnk */
4339     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4340       if (i == *nextrow[k]) { /* i-th row */
4341         anzi = *(nextai[k]+1) - *nextai[k];
4342         aj   = buf_rj[k] + *nextai[k];
4343         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4344         bnzi += nlnk;
4345         nextrow[k]++; nextai[k]++;
4346       }
4347     }
4348     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4349 
4350     /* if free space is not available, make more free space */
4351     if (current_space->local_remaining<bnzi) {
4352       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4353       nspacedouble++;
4354     }
4355     /* copy data into free space, then initialize lnk */
4356     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4357     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4358 
4359     current_space->array           += bnzi;
4360     current_space->local_used      += bnzi;
4361     current_space->local_remaining -= bnzi;
4362 
4363     bi[i+1] = bi[i] + bnzi;
4364   }
4365 
4366   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4367 
4368   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4369   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4370   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4371 
4372   /* create symbolic parallel matrix B_mpi */
4373   /*---------------------------------------*/
4374   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4375   if (n==PETSC_DECIDE) {
4376     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4377   } else {
4378     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4379   }
4380   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4381   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4382   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4383 
4384   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4385   B_mpi->assembled     = PETSC_FALSE;
4386   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4387   merge->bi            = bi;
4388   merge->bj            = bj;
4389   merge->buf_ri        = buf_ri;
4390   merge->buf_rj        = buf_rj;
4391   merge->coi           = PETSC_NULL;
4392   merge->coj           = PETSC_NULL;
4393   merge->owners_co     = PETSC_NULL;
4394 
4395   /* attach the supporting struct to B_mpi for reuse */
4396   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4397   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4398   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4399   *mpimat = B_mpi;
4400 
4401   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4402   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4403   PetscFunctionReturn(0);
4404 }
4405 
4406 #undef __FUNCT__
4407 #define __FUNCT__ "MatMerge_SeqsToMPI"
4408 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4409 {
4410   PetscErrorCode   ierr;
4411 
4412   PetscFunctionBegin;
4413   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4414   if (scall == MAT_INITIAL_MATRIX){
4415     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4416   }
4417   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4418   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4419   PetscFunctionReturn(0);
4420 }
4421 
4422 #undef __FUNCT__
4423 #define __FUNCT__ "MatGetLocalMat"
4424 /*@
4425      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4426 
4427     Not Collective
4428 
4429    Input Parameters:
4430 +    A - the matrix
4431 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4432 
4433    Output Parameter:
4434 .    A_loc - the local sequential matrix generated
4435 
4436     Level: developer
4437 
4438 @*/
4439 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4440 {
4441   PetscErrorCode  ierr;
4442   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4443   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4444   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4445   MatScalar       *aa=a->a,*ba=b->a,*cam;
4446   PetscScalar     *ca;
4447   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4448   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4449 
4450   PetscFunctionBegin;
4451   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4452   if (scall == MAT_INITIAL_MATRIX){
4453     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4454     ci[0] = 0;
4455     for (i=0; i<am; i++){
4456       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4457     }
4458     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4459     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4460     k = 0;
4461     for (i=0; i<am; i++) {
4462       ncols_o = bi[i+1] - bi[i];
4463       ncols_d = ai[i+1] - ai[i];
4464       /* off-diagonal portion of A */
4465       for (jo=0; jo<ncols_o; jo++) {
4466         col = cmap[*bj];
4467         if (col >= cstart) break;
4468         cj[k]   = col; bj++;
4469         ca[k++] = *ba++;
4470       }
4471       /* diagonal portion of A */
4472       for (j=0; j<ncols_d; j++) {
4473         cj[k]   = cstart + *aj++;
4474         ca[k++] = *aa++;
4475       }
4476       /* off-diagonal portion of A */
4477       for (j=jo; j<ncols_o; j++) {
4478         cj[k]   = cmap[*bj++];
4479         ca[k++] = *ba++;
4480       }
4481     }
4482     /* put together the new matrix */
4483     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4484     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4485     /* Since these are PETSc arrays, change flags to free them as necessary. */
4486     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4487     mat->free_a  = PETSC_TRUE;
4488     mat->free_ij = PETSC_TRUE;
4489     mat->nonew   = 0;
4490   } else if (scall == MAT_REUSE_MATRIX){
4491     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4492     ci = mat->i; cj = mat->j; cam = mat->a;
4493     for (i=0; i<am; i++) {
4494       /* off-diagonal portion of A */
4495       ncols_o = bi[i+1] - bi[i];
4496       for (jo=0; jo<ncols_o; jo++) {
4497         col = cmap[*bj];
4498         if (col >= cstart) break;
4499         *cam++ = *ba++; bj++;
4500       }
4501       /* diagonal portion of A */
4502       ncols_d = ai[i+1] - ai[i];
4503       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4504       /* off-diagonal portion of A */
4505       for (j=jo; j<ncols_o; j++) {
4506         *cam++ = *ba++; bj++;
4507       }
4508     }
4509   } else {
4510     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4511   }
4512 
4513   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4514   PetscFunctionReturn(0);
4515 }
4516 
4517 #undef __FUNCT__
4518 #define __FUNCT__ "MatGetLocalMatCondensed"
4519 /*@C
4520      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4521 
4522     Not Collective
4523 
4524    Input Parameters:
4525 +    A - the matrix
4526 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4527 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4528 
4529    Output Parameter:
4530 .    A_loc - the local sequential matrix generated
4531 
4532     Level: developer
4533 
4534 @*/
4535 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4536 {
4537   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4538   PetscErrorCode    ierr;
4539   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4540   IS                isrowa,iscola;
4541   Mat               *aloc;
4542 
4543   PetscFunctionBegin;
4544   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4545   if (!row){
4546     start = A->rmap->rstart; end = A->rmap->rend;
4547     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4548   } else {
4549     isrowa = *row;
4550   }
4551   if (!col){
4552     start = A->cmap->rstart;
4553     cmap  = a->garray;
4554     nzA   = a->A->cmap->n;
4555     nzB   = a->B->cmap->n;
4556     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4557     ncols = 0;
4558     for (i=0; i<nzB; i++) {
4559       if (cmap[i] < start) idx[ncols++] = cmap[i];
4560       else break;
4561     }
4562     imark = i;
4563     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4564     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4565     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4566     ierr = PetscFree(idx);CHKERRQ(ierr);
4567   } else {
4568     iscola = *col;
4569   }
4570   if (scall != MAT_INITIAL_MATRIX){
4571     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4572     aloc[0] = *A_loc;
4573   }
4574   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4575   *A_loc = aloc[0];
4576   ierr = PetscFree(aloc);CHKERRQ(ierr);
4577   if (!row){
4578     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4579   }
4580   if (!col){
4581     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4582   }
4583   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4584   PetscFunctionReturn(0);
4585 }
4586 
4587 #undef __FUNCT__
4588 #define __FUNCT__ "MatGetBrowsOfAcols"
4589 /*@C
4590     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4591 
4592     Collective on Mat
4593 
4594    Input Parameters:
4595 +    A,B - the matrices in mpiaij format
4596 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4597 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4598 
4599    Output Parameter:
4600 +    rowb, colb - index sets of rows and columns of B to extract
4601 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4602 -    B_seq - the sequential matrix generated
4603 
4604     Level: developer
4605 
4606 @*/
4607 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4608 {
4609   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4610   PetscErrorCode    ierr;
4611   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4612   IS                isrowb,iscolb;
4613   Mat               *bseq;
4614 
4615   PetscFunctionBegin;
4616   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4617     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4618   }
4619   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4620 
4621   if (scall == MAT_INITIAL_MATRIX){
4622     start = A->cmap->rstart;
4623     cmap  = a->garray;
4624     nzA   = a->A->cmap->n;
4625     nzB   = a->B->cmap->n;
4626     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4627     ncols = 0;
4628     for (i=0; i<nzB; i++) {  /* row < local row index */
4629       if (cmap[i] < start) idx[ncols++] = cmap[i];
4630       else break;
4631     }
4632     imark = i;
4633     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4634     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4635     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4636     ierr = PetscFree(idx);CHKERRQ(ierr);
4637     *brstart = imark;
4638     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4639   } else {
4640     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4641     isrowb = *rowb; iscolb = *colb;
4642     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4643     bseq[0] = *B_seq;
4644   }
4645   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4646   *B_seq = bseq[0];
4647   ierr = PetscFree(bseq);CHKERRQ(ierr);
4648   if (!rowb){
4649     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4650   } else {
4651     *rowb = isrowb;
4652   }
4653   if (!colb){
4654     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4655   } else {
4656     *colb = iscolb;
4657   }
4658   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4659   PetscFunctionReturn(0);
4660 }
4661 
4662 #undef __FUNCT__
4663 #define __FUNCT__ "MatGetBrowsOfAoCols"
4664 /*@C
4665     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4666     of the OFF-DIAGONAL portion of local A
4667 
4668     Collective on Mat
4669 
4670    Input Parameters:
4671 +    A,B - the matrices in mpiaij format
4672 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4673 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4674 .    startsj_r - similar to startsj for receives
4675 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4676 
4677    Output Parameter:
4678 +    B_oth - the sequential matrix generated
4679 
4680     Level: developer
4681 
4682 @*/
4683 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4684 {
4685   VecScatter_MPI_General *gen_to,*gen_from;
4686   PetscErrorCode         ierr;
4687   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4688   Mat_SeqAIJ             *b_oth;
4689   VecScatter             ctx=a->Mvctx;
4690   MPI_Comm               comm=((PetscObject)ctx)->comm;
4691   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4692   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4693   PetscScalar            *rvalues,*svalues;
4694   MatScalar              *b_otha,*bufa,*bufA;
4695   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4696   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4697   MPI_Status             *sstatus,rstatus;
4698   PetscMPIInt            jj;
4699   PetscInt               *cols,sbs,rbs;
4700   PetscScalar            *vals;
4701 
4702   PetscFunctionBegin;
4703   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4704     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4705   }
4706   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4707   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4708 
4709   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4710   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4711   rvalues  = gen_from->values; /* holds the length of receiving row */
4712   svalues  = gen_to->values;   /* holds the length of sending row */
4713   nrecvs   = gen_from->n;
4714   nsends   = gen_to->n;
4715 
4716   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4717   srow     = gen_to->indices;   /* local row index to be sent */
4718   sstarts  = gen_to->starts;
4719   sprocs   = gen_to->procs;
4720   sstatus  = gen_to->sstatus;
4721   sbs      = gen_to->bs;
4722   rstarts  = gen_from->starts;
4723   rprocs   = gen_from->procs;
4724   rbs      = gen_from->bs;
4725 
4726   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4727   if (scall == MAT_INITIAL_MATRIX){
4728     /* i-array */
4729     /*---------*/
4730     /*  post receives */
4731     for (i=0; i<nrecvs; i++){
4732       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4733       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4734       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4735     }
4736 
4737     /* pack the outgoing message */
4738     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
4739     sstartsj[0] = 0;  rstartsj[0] = 0;
4740     len = 0; /* total length of j or a array to be sent */
4741     k = 0;
4742     for (i=0; i<nsends; i++){
4743       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4744       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4745       for (j=0; j<nrows; j++) {
4746         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4747         for (l=0; l<sbs; l++){
4748           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4749           rowlen[j*sbs+l] = ncols;
4750           len += ncols;
4751           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4752         }
4753         k++;
4754       }
4755       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4756       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4757     }
4758     /* recvs and sends of i-array are completed */
4759     i = nrecvs;
4760     while (i--) {
4761       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4762     }
4763     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4764 
4765     /* allocate buffers for sending j and a arrays */
4766     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4767     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4768 
4769     /* create i-array of B_oth */
4770     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4771     b_othi[0] = 0;
4772     len = 0; /* total length of j or a array to be received */
4773     k = 0;
4774     for (i=0; i<nrecvs; i++){
4775       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4776       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4777       for (j=0; j<nrows; j++) {
4778         b_othi[k+1] = b_othi[k] + rowlen[j];
4779         len += rowlen[j]; k++;
4780       }
4781       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4782     }
4783 
4784     /* allocate space for j and a arrrays of B_oth */
4785     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4786     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4787 
4788     /* j-array */
4789     /*---------*/
4790     /*  post receives of j-array */
4791     for (i=0; i<nrecvs; i++){
4792       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4793       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4794     }
4795 
4796     /* pack the outgoing message j-array */
4797     k = 0;
4798     for (i=0; i<nsends; i++){
4799       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4800       bufJ = bufj+sstartsj[i];
4801       for (j=0; j<nrows; j++) {
4802         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4803         for (ll=0; ll<sbs; ll++){
4804           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4805           for (l=0; l<ncols; l++){
4806             *bufJ++ = cols[l];
4807           }
4808           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4809         }
4810       }
4811       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4812     }
4813 
4814     /* recvs and sends of j-array are completed */
4815     i = nrecvs;
4816     while (i--) {
4817       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4818     }
4819     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4820   } else if (scall == MAT_REUSE_MATRIX){
4821     sstartsj = *startsj;
4822     rstartsj = *startsj_r;
4823     bufa     = *bufa_ptr;
4824     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4825     b_otha   = b_oth->a;
4826   } else {
4827     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4828   }
4829 
4830   /* a-array */
4831   /*---------*/
4832   /*  post receives of a-array */
4833   for (i=0; i<nrecvs; i++){
4834     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4835     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4836   }
4837 
4838   /* pack the outgoing message a-array */
4839   k = 0;
4840   for (i=0; i<nsends; i++){
4841     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4842     bufA = bufa+sstartsj[i];
4843     for (j=0; j<nrows; j++) {
4844       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4845       for (ll=0; ll<sbs; ll++){
4846         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4847         for (l=0; l<ncols; l++){
4848           *bufA++ = vals[l];
4849         }
4850         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4851       }
4852     }
4853     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4854   }
4855   /* recvs and sends of a-array are completed */
4856   i = nrecvs;
4857   while (i--) {
4858     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4859   }
4860   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4861   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4862 
4863   if (scall == MAT_INITIAL_MATRIX){
4864     /* put together the new matrix */
4865     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4866 
4867     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4868     /* Since these are PETSc arrays, change flags to free them as necessary. */
4869     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4870     b_oth->free_a  = PETSC_TRUE;
4871     b_oth->free_ij = PETSC_TRUE;
4872     b_oth->nonew   = 0;
4873 
4874     ierr = PetscFree(bufj);CHKERRQ(ierr);
4875     if (!startsj || !bufa_ptr){
4876       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4877       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4878     } else {
4879       *startsj   = sstartsj;
4880       *startsj_r = rstartsj;
4881       *bufa_ptr  = bufa;
4882     }
4883   }
4884   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4885   PetscFunctionReturn(0);
4886 }
4887 
4888 #undef __FUNCT__
4889 #define __FUNCT__ "MatGetCommunicationStructs"
4890 /*@C
4891   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4892 
4893   Not Collective
4894 
4895   Input Parameters:
4896 . A - The matrix in mpiaij format
4897 
4898   Output Parameter:
4899 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4900 . colmap - A map from global column index to local index into lvec
4901 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4902 
4903   Level: developer
4904 
4905 @*/
4906 #if defined (PETSC_USE_CTABLE)
4907 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4908 #else
4909 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4910 #endif
4911 {
4912   Mat_MPIAIJ *a;
4913 
4914   PetscFunctionBegin;
4915   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4916   PetscValidPointer(lvec, 2)
4917   PetscValidPointer(colmap, 3)
4918   PetscValidPointer(multScatter, 4)
4919   a = (Mat_MPIAIJ *) A->data;
4920   if (lvec) *lvec = a->lvec;
4921   if (colmap) *colmap = a->colmap;
4922   if (multScatter) *multScatter = a->Mvctx;
4923   PetscFunctionReturn(0);
4924 }
4925 
4926 EXTERN_C_BEGIN
4927 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*);
4928 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*);
4929 EXTERN_C_END
4930 
4931 #undef __FUNCT__
4932 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4933 /*
4934     Computes (B'*A')' since computing B*A directly is untenable
4935 
4936                n                       p                          p
4937         (              )       (              )         (                  )
4938       m (      A       )  *  n (       B      )   =   m (         C        )
4939         (              )       (              )         (                  )
4940 
4941 */
4942 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4943 {
4944   PetscErrorCode     ierr;
4945   Mat                At,Bt,Ct;
4946 
4947   PetscFunctionBegin;
4948   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4949   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4950   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4951   ierr = MatDestroy(At);CHKERRQ(ierr);
4952   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4953   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4954   ierr = MatDestroy(Ct);CHKERRQ(ierr);
4955   PetscFunctionReturn(0);
4956 }
4957 
4958 #undef __FUNCT__
4959 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4960 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4961 {
4962   PetscErrorCode ierr;
4963   PetscInt       m=A->rmap->n,n=B->cmap->n;
4964   Mat            Cmat;
4965 
4966   PetscFunctionBegin;
4967   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4968   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
4969   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4970   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4971   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
4972   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4973   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4974   *C   = Cmat;
4975   PetscFunctionReturn(0);
4976 }
4977 
4978 /* ----------------------------------------------------------------*/
4979 #undef __FUNCT__
4980 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4981 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4982 {
4983   PetscErrorCode ierr;
4984 
4985   PetscFunctionBegin;
4986   if (scall == MAT_INITIAL_MATRIX){
4987     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4988   }
4989   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4990   PetscFunctionReturn(0);
4991 }
4992 
4993 EXTERN_C_BEGIN
4994 #if defined(PETSC_HAVE_MUMPS)
4995 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*);
4996 #endif
4997 #if defined(PETSC_HAVE_PASTIX)
4998 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
4999 #endif
5000 #if defined(PETSC_HAVE_SUPERLU_DIST)
5001 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5002 #endif
5003 #if defined(PETSC_HAVE_SPOOLES)
5004 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5005 #endif
5006 EXTERN_C_END
5007 
5008 /*MC
5009    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5010 
5011    Options Database Keys:
5012 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5013 
5014   Level: beginner
5015 
5016 .seealso: MatCreateMPIAIJ()
5017 M*/
5018 
5019 EXTERN_C_BEGIN
5020 #undef __FUNCT__
5021 #define __FUNCT__ "MatCreate_MPIAIJ"
5022 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
5023 {
5024   Mat_MPIAIJ     *b;
5025   PetscErrorCode ierr;
5026   PetscMPIInt    size;
5027 
5028   PetscFunctionBegin;
5029   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5030 
5031   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5032   B->data         = (void*)b;
5033   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5034   B->rmap->bs     = 1;
5035   B->assembled    = PETSC_FALSE;
5036   B->mapping      = 0;
5037 
5038   B->insertmode   = NOT_SET_VALUES;
5039   b->size         = size;
5040   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5041 
5042   /* build cache for off array entries formed */
5043   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5044   b->donotstash  = PETSC_FALSE;
5045   b->colmap      = 0;
5046   b->garray      = 0;
5047   b->roworiented = PETSC_TRUE;
5048 
5049   /* stuff used for matrix vector multiply */
5050   b->lvec      = PETSC_NULL;
5051   b->Mvctx     = PETSC_NULL;
5052 
5053   /* stuff for MatGetRow() */
5054   b->rowindices   = 0;
5055   b->rowvalues    = 0;
5056   b->getrowactive = PETSC_FALSE;
5057 
5058 #if defined(PETSC_HAVE_SPOOLES)
5059   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5060                                      "MatGetFactor_mpiaij_spooles",
5061                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5062 #endif
5063 #if defined(PETSC_HAVE_MUMPS)
5064   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5065                                      "MatGetFactor_mpiaij_mumps",
5066                                      MatGetFactor_mpiaij_mumps);CHKERRQ(ierr);
5067 #endif
5068 #if defined(PETSC_HAVE_PASTIX)
5069   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5070 					   "MatGetFactor_mpiaij_pastix",
5071 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5072 #endif
5073 #if defined(PETSC_HAVE_SUPERLU_DIST)
5074   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5075                                      "MatGetFactor_mpiaij_superlu_dist",
5076                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5077 #endif
5078   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5079                                      "MatStoreValues_MPIAIJ",
5080                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5081   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5082                                      "MatRetrieveValues_MPIAIJ",
5083                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5084   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5085 				     "MatGetDiagonalBlock_MPIAIJ",
5086                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5087   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5088 				     "MatIsTranspose_MPIAIJ",
5089 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5090   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5091 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5092 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5093   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5094 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5095 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5096   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5097 				     "MatDiagonalScaleLocal_MPIAIJ",
5098 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5099   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
5100                                      "MatConvert_MPIAIJ_MPICSRPERM",
5101                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
5102   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
5103                                      "MatConvert_MPIAIJ_MPICRL",
5104                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
5105   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5106                                      "MatConvert_MPIAIJ_MPISBAIJ",
5107                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5108   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5109                                      "MatMatMult_MPIDense_MPIAIJ",
5110                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5111   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5112                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5113                                       MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5114   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5115                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5116                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5117   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5118   PetscFunctionReturn(0);
5119 }
5120 EXTERN_C_END
5121 
5122 #undef __FUNCT__
5123 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5124 /*@
5125      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5126          and "off-diagonal" part of the matrix in CSR format.
5127 
5128    Collective on MPI_Comm
5129 
5130    Input Parameters:
5131 +  comm - MPI communicator
5132 .  m - number of local rows (Cannot be PETSC_DECIDE)
5133 .  n - This value should be the same as the local size used in creating the
5134        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5135        calculated if N is given) For square matrices n is almost always m.
5136 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5137 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5138 .   i - row indices for "diagonal" portion of matrix
5139 .   j - column indices
5140 .   a - matrix values
5141 .   oi - row indices for "off-diagonal" portion of matrix
5142 .   oj - column indices
5143 -   oa - matrix values
5144 
5145    Output Parameter:
5146 .   mat - the matrix
5147 
5148    Level: advanced
5149 
5150    Notes:
5151        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5152 
5153        The i and j indices are 0 based
5154 
5155        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5156 
5157        This sets local rows and cannot be used to set off-processor values.
5158 
5159        You cannot later use MatSetValues() to change values in this matrix.
5160 
5161 .keywords: matrix, aij, compressed row, sparse, parallel
5162 
5163 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5164           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5165 @*/
5166 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5167 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5168 {
5169   PetscErrorCode ierr;
5170   Mat_MPIAIJ     *maij;
5171 
5172  PetscFunctionBegin;
5173   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5174   if (i[0]) {
5175     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5176   }
5177   if (oi[0]) {
5178     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5179   }
5180   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5181   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5182   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5183   maij = (Mat_MPIAIJ*) (*mat)->data;
5184   maij->donotstash     = PETSC_TRUE;
5185   (*mat)->preallocated = PETSC_TRUE;
5186 
5187   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5188   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5189   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5190   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5191 
5192   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5193   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5194 
5195   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5196   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5197   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5198   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5199 
5200   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5201   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5202   PetscFunctionReturn(0);
5203 }
5204 
5205 /*
5206     Special version for direct calls from Fortran
5207 */
5208 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5209 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5210 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5211 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5212 #endif
5213 
5214 /* Change these macros so can be used in void function */
5215 #undef CHKERRQ
5216 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5217 #undef SETERRQ2
5218 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5219 #undef SETERRQ
5220 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5221 
5222 EXTERN_C_BEGIN
5223 #undef __FUNCT__
5224 #define __FUNCT__ "matsetvaluesmpiaij_"
5225 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5226 {
5227   Mat             mat = *mmat;
5228   PetscInt        m = *mm, n = *mn;
5229   InsertMode      addv = *maddv;
5230   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5231   PetscScalar     value;
5232   PetscErrorCode  ierr;
5233 
5234   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5235   if (mat->insertmode == NOT_SET_VALUES) {
5236     mat->insertmode = addv;
5237   }
5238 #if defined(PETSC_USE_DEBUG)
5239   else if (mat->insertmode != addv) {
5240     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5241   }
5242 #endif
5243   {
5244   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5245   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5246   PetscTruth      roworiented = aij->roworiented;
5247 
5248   /* Some Variables required in the macro */
5249   Mat             A = aij->A;
5250   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5251   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5252   MatScalar       *aa = a->a;
5253   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5254   Mat             B = aij->B;
5255   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5256   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5257   MatScalar       *ba = b->a;
5258 
5259   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5260   PetscInt        nonew = a->nonew;
5261   MatScalar       *ap1,*ap2;
5262 
5263   PetscFunctionBegin;
5264   for (i=0; i<m; i++) {
5265     if (im[i] < 0) continue;
5266 #if defined(PETSC_USE_DEBUG)
5267     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5268 #endif
5269     if (im[i] >= rstart && im[i] < rend) {
5270       row      = im[i] - rstart;
5271       lastcol1 = -1;
5272       rp1      = aj + ai[row];
5273       ap1      = aa + ai[row];
5274       rmax1    = aimax[row];
5275       nrow1    = ailen[row];
5276       low1     = 0;
5277       high1    = nrow1;
5278       lastcol2 = -1;
5279       rp2      = bj + bi[row];
5280       ap2      = ba + bi[row];
5281       rmax2    = bimax[row];
5282       nrow2    = bilen[row];
5283       low2     = 0;
5284       high2    = nrow2;
5285 
5286       for (j=0; j<n; j++) {
5287         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5288         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5289         if (in[j] >= cstart && in[j] < cend){
5290           col = in[j] - cstart;
5291           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5292         } else if (in[j] < 0) continue;
5293 #if defined(PETSC_USE_DEBUG)
5294         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5295 #endif
5296         else {
5297           if (mat->was_assembled) {
5298             if (!aij->colmap) {
5299               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5300             }
5301 #if defined (PETSC_USE_CTABLE)
5302             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5303 	    col--;
5304 #else
5305             col = aij->colmap[in[j]] - 1;
5306 #endif
5307             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5308               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5309               col =  in[j];
5310               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5311               B = aij->B;
5312               b = (Mat_SeqAIJ*)B->data;
5313               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5314               rp2      = bj + bi[row];
5315               ap2      = ba + bi[row];
5316               rmax2    = bimax[row];
5317               nrow2    = bilen[row];
5318               low2     = 0;
5319               high2    = nrow2;
5320               bm       = aij->B->rmap->n;
5321               ba = b->a;
5322             }
5323           } else col = in[j];
5324           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5325         }
5326       }
5327     } else {
5328       if (!aij->donotstash) {
5329         if (roworiented) {
5330           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5331         } else {
5332           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5333         }
5334       }
5335     }
5336   }}
5337   PetscFunctionReturnVoid();
5338 }
5339 EXTERN_C_END
5340 
5341