xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision effcda254f9cbe71bfcedd0c91d3e94f137f61e2)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "MatDistribute_MPIAIJ"
7 /*
8     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
9     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
10 
11     Only for square matrices
12 */
13 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
14 {
15   PetscMPIInt    rank,size;
16   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
17   PetscErrorCode ierr;
18   Mat            mat;
19   Mat_SeqAIJ     *gmata;
20   PetscMPIInt    tag;
21   MPI_Status     status;
22   PetscTruth     aij;
23   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
24 
25   PetscFunctionBegin;
26   CHKMEMQ;
27   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
28   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
29   if (!rank) {
30     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
31     if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
32   }
33   if (reuse == MAT_INITIAL_MATRIX) {
34     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
35     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
36     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
37     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
38     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
39     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
40     rowners[0] = 0;
41     for (i=2; i<=size; i++) {
42       rowners[i] += rowners[i-1];
43     }
44     rstart = rowners[rank];
45     rend   = rowners[rank+1];
46     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
47     if (!rank) {
48       gmata = (Mat_SeqAIJ*) gmat->data;
49       /* send row lengths to all processors */
50       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
51       for (i=1; i<size; i++) {
52 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
53       }
54       /* determine number diagonal and off-diagonal counts */
55       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
56       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
57       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
58       jj = 0;
59       for (i=0; i<m; i++) {
60 	for (j=0; j<dlens[i]; j++) {
61           if (gmata->j[jj] < rstart) ld[i]++;
62 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
63 	  jj++;
64 	}
65       }
66       /* send column indices to other processes */
67       for (i=1; i<size; i++) {
68 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
69 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
70 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71       }
72 
73       /* send numerical values to other processes */
74       for (i=1; i<size; i++) {
75         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
76         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
77       }
78       gmataa = gmata->a;
79       gmataj = gmata->j;
80 
81     } else {
82       /* receive row lengths */
83       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
84       /* receive column indices */
85       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
86       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
87       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
88       /* determine number diagonal and off-diagonal counts */
89       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
90       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
91       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
92       jj = 0;
93       for (i=0; i<m; i++) {
94 	for (j=0; j<dlens[i]; j++) {
95           if (gmataj[jj] < rstart) ld[i]++;
96 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
97 	  jj++;
98 	}
99       }
100       /* receive numerical values */
101       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
102       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
103     }
104     /* set preallocation */
105     for (i=0; i<m; i++) {
106       dlens[i] -= olens[i];
107     }
108     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
109     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
110 
111     for (i=0; i<m; i++) {
112       dlens[i] += olens[i];
113     }
114     cnt  = 0;
115     for (i=0; i<m; i++) {
116       row  = rstart + i;
117       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
118       cnt += dlens[i];
119     }
120     if (rank) {
121       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
122     }
123     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
124     ierr = PetscFree(rowners);CHKERRQ(ierr);
125     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
126     *inmat = mat;
127   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
128     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
129     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
130     mat   = *inmat;
131     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
132     if (!rank) {
133       /* send numerical values to other processes */
134       gmata = (Mat_SeqAIJ*) gmat->data;
135       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
136       gmataa = gmata->a;
137       for (i=1; i<size; i++) {
138         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
139         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
140       }
141       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
142     } else {
143       /* receive numerical values from process 0*/
144       nz   = Ad->nz + Ao->nz;
145       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
146       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
147     }
148     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
149     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
150     ad = Ad->a;
151     ao = Ao->a;
152     if (mat->rmap->n) {
153       i  = 0;
154       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
155       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
156     }
157     for (i=1; i<mat->rmap->n; i++) {
158       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
159       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
160     }
161     i--;
162     if (mat->rmap->n) {
163       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
164     }
165     if (rank) {
166       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
167     }
168   }
169   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
170   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   CHKMEMQ;
172   PetscFunctionReturn(0);
173 }
174 
175 /*
176   Local utility routine that creates a mapping from the global column
177 number to the local number in the off-diagonal part of the local
178 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
179 a slightly higher hash table cost; without it it is not scalable (each processor
180 has an order N integer array but is fast to acess.
181 */
182 #undef __FUNCT__
183 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
184 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
185 {
186   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
187   PetscErrorCode ierr;
188   PetscInt       n = aij->B->cmap->n,i;
189 
190   PetscFunctionBegin;
191 #if defined (PETSC_USE_CTABLE)
192   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
193   for (i=0; i<n; i++){
194     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
195   }
196 #else
197   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
198   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
199   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
201 #endif
202   PetscFunctionReturn(0);
203 }
204 
205 
206 #define CHUNKSIZE   15
207 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
208 { \
209     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
210     lastcol1 = col;\
211     while (high1-low1 > 5) { \
212       t = (low1+high1)/2; \
213       if (rp1[t] > col) high1 = t; \
214       else             low1  = t; \
215     } \
216       for (_i=low1; _i<high1; _i++) { \
217         if (rp1[_i] > col) break; \
218         if (rp1[_i] == col) { \
219           if (addv == ADD_VALUES) ap1[_i] += value;   \
220           else                    ap1[_i] = value; \
221           goto a_noinsert; \
222         } \
223       }  \
224       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
225       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
226       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
227       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
228       N = nrow1++ - 1; a->nz++; high1++; \
229       /* shift up all the later entries in this row */ \
230       for (ii=N; ii>=_i; ii--) { \
231         rp1[ii+1] = rp1[ii]; \
232         ap1[ii+1] = ap1[ii]; \
233       } \
234       rp1[_i] = col;  \
235       ap1[_i] = value;  \
236       a_noinsert: ; \
237       ailen[row] = nrow1; \
238 }
239 
240 
241 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
242 { \
243     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
244     lastcol2 = col;\
245     while (high2-low2 > 5) { \
246       t = (low2+high2)/2; \
247       if (rp2[t] > col) high2 = t; \
248       else             low2  = t; \
249     } \
250     for (_i=low2; _i<high2; _i++) {		\
251       if (rp2[_i] > col) break;			\
252       if (rp2[_i] == col) {			      \
253 	if (addv == ADD_VALUES) ap2[_i] += value;     \
254 	else                    ap2[_i] = value;      \
255 	goto b_noinsert;			      \
256       }						      \
257     }							      \
258     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
259     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
260     if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
261     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
262     N = nrow2++ - 1; b->nz++; high2++;					\
263     /* shift up all the later entries in this row */			\
264     for (ii=N; ii>=_i; ii--) {						\
265       rp2[ii+1] = rp2[ii];						\
266       ap2[ii+1] = ap2[ii];						\
267     }									\
268     rp2[_i] = col;							\
269     ap2[_i] = value;							\
270     b_noinsert: ;								\
271     bilen[row] = nrow2;							\
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
276 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
277 {
278   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
279   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
280   PetscErrorCode ierr;
281   PetscInt       l,*garray = mat->garray,diag;
282 
283   PetscFunctionBegin;
284   /* code only works for square matrices A */
285 
286   /* find size of row to the left of the diagonal part */
287   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
288   row  = row - diag;
289   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
290     if (garray[b->j[b->i[row]+l]] > diag) break;
291   }
292   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
293 
294   /* diagonal part */
295   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
296 
297   /* right of diagonal part */
298   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
299   PetscFunctionReturn(0);
300 }
301 
302 #undef __FUNCT__
303 #define __FUNCT__ "MatSetValues_MPIAIJ"
304 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
305 {
306   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
307   PetscScalar    value;
308   PetscErrorCode ierr;
309   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
310   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
311   PetscTruth     roworiented = aij->roworiented;
312 
313   /* Some Variables required in the macro */
314   Mat            A = aij->A;
315   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
316   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
317   MatScalar      *aa = a->a;
318   PetscTruth     ignorezeroentries = a->ignorezeroentries;
319   Mat            B = aij->B;
320   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
321   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
322   MatScalar      *ba = b->a;
323 
324   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
325   PetscInt       nonew = a->nonew;
326   MatScalar      *ap1,*ap2;
327 
328   PetscFunctionBegin;
329   for (i=0; i<m; i++) {
330     if (im[i] < 0) continue;
331 #if defined(PETSC_USE_DEBUG)
332     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
333 #endif
334     if (im[i] >= rstart && im[i] < rend) {
335       row      = im[i] - rstart;
336       lastcol1 = -1;
337       rp1      = aj + ai[row];
338       ap1      = aa + ai[row];
339       rmax1    = aimax[row];
340       nrow1    = ailen[row];
341       low1     = 0;
342       high1    = nrow1;
343       lastcol2 = -1;
344       rp2      = bj + bi[row];
345       ap2      = ba + bi[row];
346       rmax2    = bimax[row];
347       nrow2    = bilen[row];
348       low2     = 0;
349       high2    = nrow2;
350 
351       for (j=0; j<n; j++) {
352         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
353         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
354         if (in[j] >= cstart && in[j] < cend){
355           col = in[j] - cstart;
356           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
357         } else if (in[j] < 0) continue;
358 #if defined(PETSC_USE_DEBUG)
359         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
360 #endif
361         else {
362           if (mat->was_assembled) {
363             if (!aij->colmap) {
364               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
365             }
366 #if defined (PETSC_USE_CTABLE)
367             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
368 	    col--;
369 #else
370             col = aij->colmap[in[j]] - 1;
371 #endif
372             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
373               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
374               col =  in[j];
375               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
376               B = aij->B;
377               b = (Mat_SeqAIJ*)B->data;
378               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
379               rp2      = bj + bi[row];
380               ap2      = ba + bi[row];
381               rmax2    = bimax[row];
382               nrow2    = bilen[row];
383               low2     = 0;
384               high2    = nrow2;
385               bm       = aij->B->rmap->n;
386               ba = b->a;
387             }
388           } else col = in[j];
389           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
390         }
391       }
392     } else {
393       if (!aij->donotstash) {
394         if (roworiented) {
395           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
396         } else {
397           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
398         }
399       }
400     }
401   }
402   PetscFunctionReturn(0);
403 }
404 
405 #undef __FUNCT__
406 #define __FUNCT__ "MatGetValues_MPIAIJ"
407 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
408 {
409   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
410   PetscErrorCode ierr;
411   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
412   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
413 
414   PetscFunctionBegin;
415   for (i=0; i<m; i++) {
416     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
417     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
418     if (idxm[i] >= rstart && idxm[i] < rend) {
419       row = idxm[i] - rstart;
420       for (j=0; j<n; j++) {
421         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
422         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
423         if (idxn[j] >= cstart && idxn[j] < cend){
424           col = idxn[j] - cstart;
425           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
426         } else {
427           if (!aij->colmap) {
428             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
429           }
430 #if defined (PETSC_USE_CTABLE)
431           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
432           col --;
433 #else
434           col = aij->colmap[idxn[j]] - 1;
435 #endif
436           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
437           else {
438             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
439           }
440         }
441       }
442     } else {
443       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
444     }
445   }
446   PetscFunctionReturn(0);
447 }
448 
449 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
453 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
454 {
455   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
456   PetscErrorCode ierr;
457   PetscInt       nstash,reallocs;
458   InsertMode     addv;
459 
460   PetscFunctionBegin;
461   if (aij->donotstash) {
462     PetscFunctionReturn(0);
463   }
464 
465   /* make sure all processors are either in INSERTMODE or ADDMODE */
466   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
467   if (addv == (ADD_VALUES|INSERT_VALUES)) {
468     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
469   }
470   mat->insertmode = addv; /* in case this processor had no cache */
471 
472   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
473   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
474   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
475   PetscFunctionReturn(0);
476 }
477 
478 #undef __FUNCT__
479 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
480 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
481 {
482   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
483   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
484   PetscErrorCode ierr;
485   PetscMPIInt    n;
486   PetscInt       i,j,rstart,ncols,flg;
487   PetscInt       *row,*col;
488   PetscTruth     other_disassembled;
489   PetscScalar    *val;
490   InsertMode     addv = mat->insertmode;
491 
492   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
493   PetscFunctionBegin;
494   if (!aij->donotstash) {
495     while (1) {
496       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
497       if (!flg) break;
498 
499       for (i=0; i<n;) {
500         /* Now identify the consecutive vals belonging to the same row */
501         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
502         if (j < n) ncols = j-i;
503         else       ncols = n-i;
504         /* Now assemble all these values with a single function call */
505         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
506         i = j;
507       }
508     }
509     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
510   }
511   a->compressedrow.use     = PETSC_FALSE;
512   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
513   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
514 
515   /* determine if any processor has disassembled, if so we must
516      also disassemble ourselfs, in order that we may reassemble. */
517   /*
518      if nonzero structure of submatrix B cannot change then we know that
519      no processor disassembled thus we can skip this stuff
520   */
521   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
522     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
523     if (mat->was_assembled && !other_disassembled) {
524       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
525     }
526   }
527   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
528     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
529   }
530   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
531   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
532   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
533   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
534 
535   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
536   aij->rowvalues = 0;
537 
538   /* used by MatAXPY() */
539   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
540   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
541 
542   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
543   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
544   PetscFunctionReturn(0);
545 }
546 
547 #undef __FUNCT__
548 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
549 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
550 {
551   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
552   PetscErrorCode ierr;
553 
554   PetscFunctionBegin;
555   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
556   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatZeroRows_MPIAIJ"
562 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
563 {
564   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
565   PetscErrorCode ierr;
566   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
567   PetscInt       i,*owners = A->rmap->range;
568   PetscInt       *nprocs,j,idx,nsends,row;
569   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
570   PetscInt       *rvalues,count,base,slen,*source;
571   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
572   MPI_Comm       comm = ((PetscObject)A)->comm;
573   MPI_Request    *send_waits,*recv_waits;
574   MPI_Status     recv_status,*send_status;
575 #if defined(PETSC_DEBUG)
576   PetscTruth     found = PETSC_FALSE;
577 #endif
578 
579   PetscFunctionBegin;
580   /*  first count number of contributors to each processor */
581   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
582   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
583   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
584   j = 0;
585   for (i=0; i<N; i++) {
586     if (lastidx > (idx = rows[i])) j = 0;
587     lastidx = idx;
588     for (; j<size; j++) {
589       if (idx >= owners[j] && idx < owners[j+1]) {
590         nprocs[2*j]++;
591         nprocs[2*j+1] = 1;
592         owner[i] = j;
593 #if defined(PETSC_DEBUG)
594         found = PETSC_TRUE;
595 #endif
596         break;
597       }
598     }
599 #if defined(PETSC_DEBUG)
600     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
601     found = PETSC_FALSE;
602 #endif
603   }
604   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
605 
606   /* inform other processors of number of messages and max length*/
607   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
608 
609   /* post receives:   */
610   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
611   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
612   for (i=0; i<nrecvs; i++) {
613     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
614   }
615 
616   /* do sends:
617       1) starts[i] gives the starting index in svalues for stuff going to
618          the ith processor
619   */
620   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
621   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
622   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
623   starts[0] = 0;
624   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
625   for (i=0; i<N; i++) {
626     svalues[starts[owner[i]]++] = rows[i];
627   }
628 
629   starts[0] = 0;
630   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
631   count = 0;
632   for (i=0; i<size; i++) {
633     if (nprocs[2*i+1]) {
634       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
635     }
636   }
637   ierr = PetscFree(starts);CHKERRQ(ierr);
638 
639   base = owners[rank];
640 
641   /*  wait on receives */
642   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
643   source = lens + nrecvs;
644   count  = nrecvs; slen = 0;
645   while (count) {
646     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
647     /* unpack receives into our local space */
648     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
649     source[imdex]  = recv_status.MPI_SOURCE;
650     lens[imdex]    = n;
651     slen          += n;
652     count--;
653   }
654   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
655 
656   /* move the data into the send scatter */
657   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
658   count = 0;
659   for (i=0; i<nrecvs; i++) {
660     values = rvalues + i*nmax;
661     for (j=0; j<lens[i]; j++) {
662       lrows[count++] = values[j] - base;
663     }
664   }
665   ierr = PetscFree(rvalues);CHKERRQ(ierr);
666   ierr = PetscFree(lens);CHKERRQ(ierr);
667   ierr = PetscFree(owner);CHKERRQ(ierr);
668   ierr = PetscFree(nprocs);CHKERRQ(ierr);
669 
670   /* actually zap the local rows */
671   /*
672         Zero the required rows. If the "diagonal block" of the matrix
673      is square and the user wishes to set the diagonal we use separate
674      code so that MatSetValues() is not called for each diagonal allocating
675      new memory, thus calling lots of mallocs and slowing things down.
676 
677        Contributed by: Matthew Knepley
678   */
679   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
680   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
681   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
682     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
683   } else if (diag != 0.0) {
684     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
685     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
686       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
687 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
688     }
689     for (i = 0; i < slen; i++) {
690       row  = lrows[i] + rstart;
691       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
692     }
693     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
694     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
695   } else {
696     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
697   }
698   ierr = PetscFree(lrows);CHKERRQ(ierr);
699 
700   /* wait on sends */
701   if (nsends) {
702     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
703     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
704     ierr = PetscFree(send_status);CHKERRQ(ierr);
705   }
706   ierr = PetscFree(send_waits);CHKERRQ(ierr);
707   ierr = PetscFree(svalues);CHKERRQ(ierr);
708 
709   PetscFunctionReturn(0);
710 }
711 
712 #undef __FUNCT__
713 #define __FUNCT__ "MatMult_MPIAIJ"
714 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
715 {
716   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
717   PetscErrorCode ierr;
718   PetscInt       nt;
719 
720   PetscFunctionBegin;
721   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
722   if (nt != A->cmap->n) {
723     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
724   }
725   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
726   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
727   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
728   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
729   PetscFunctionReturn(0);
730 }
731 
732 #undef __FUNCT__
733 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
734 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
735 {
736   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
737   PetscErrorCode ierr;
738 
739   PetscFunctionBegin;
740   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
741   PetscFunctionReturn(0);
742 }
743 
744 #undef __FUNCT__
745 #define __FUNCT__ "MatMultAdd_MPIAIJ"
746 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
747 {
748   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
749   PetscErrorCode ierr;
750 
751   PetscFunctionBegin;
752   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
753   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
754   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
755   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
756   PetscFunctionReturn(0);
757 }
758 
759 #undef __FUNCT__
760 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
761 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
762 {
763   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
764   PetscErrorCode ierr;
765   PetscTruth     merged;
766 
767   PetscFunctionBegin;
768   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
769   /* do nondiagonal part */
770   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
771   if (!merged) {
772     /* send it on its way */
773     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
774     /* do local part */
775     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
776     /* receive remote parts: note this assumes the values are not actually */
777     /* added in yy until the next line, */
778     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
779   } else {
780     /* do local part */
781     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
782     /* send it on its way */
783     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
784     /* values actually were received in the Begin() but we need to call this nop */
785     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
786   }
787   PetscFunctionReturn(0);
788 }
789 
790 EXTERN_C_BEGIN
791 #undef __FUNCT__
792 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
793 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
794 {
795   MPI_Comm       comm;
796   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
797   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
798   IS             Me,Notme;
799   PetscErrorCode ierr;
800   PetscInt       M,N,first,last,*notme,i;
801   PetscMPIInt    size;
802 
803   PetscFunctionBegin;
804 
805   /* Easy test: symmetric diagonal block */
806   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
807   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
808   if (!*f) PetscFunctionReturn(0);
809   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
810   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
811   if (size == 1) PetscFunctionReturn(0);
812 
813   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
814   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
815   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
816   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
817   for (i=0; i<first; i++) notme[i] = i;
818   for (i=last; i<M; i++) notme[i-last+first] = i;
819   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
820   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
821   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
822   Aoff = Aoffs[0];
823   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
824   Boff = Boffs[0];
825   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
826   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
827   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
828   ierr = ISDestroy(Me);CHKERRQ(ierr);
829   ierr = ISDestroy(Notme);CHKERRQ(ierr);
830 
831   PetscFunctionReturn(0);
832 }
833 EXTERN_C_END
834 
835 #undef __FUNCT__
836 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
837 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
838 {
839   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
840   PetscErrorCode ierr;
841 
842   PetscFunctionBegin;
843   /* do nondiagonal part */
844   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
845   /* send it on its way */
846   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
847   /* do local part */
848   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
849   /* receive remote parts */
850   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
851   PetscFunctionReturn(0);
852 }
853 
854 /*
855   This only works correctly for square matrices where the subblock A->A is the
856    diagonal block
857 */
858 #undef __FUNCT__
859 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
860 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
861 {
862   PetscErrorCode ierr;
863   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
864 
865   PetscFunctionBegin;
866   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
867   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) {
868     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
869   }
870   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
871   PetscFunctionReturn(0);
872 }
873 
874 #undef __FUNCT__
875 #define __FUNCT__ "MatScale_MPIAIJ"
876 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
877 {
878   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
879   PetscErrorCode ierr;
880 
881   PetscFunctionBegin;
882   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
883   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
884   PetscFunctionReturn(0);
885 }
886 
887 #undef __FUNCT__
888 #define __FUNCT__ "MatDestroy_MPIAIJ"
889 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
890 {
891   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
892   PetscErrorCode ierr;
893 
894   PetscFunctionBegin;
895 #if defined(PETSC_USE_LOG)
896   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
897 #endif
898   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
899   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
900   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
901   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
902 #if defined (PETSC_USE_CTABLE)
903   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
904 #else
905   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
906 #endif
907   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
908   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
909   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
910   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
911   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
912   ierr = PetscFree(aij);CHKERRQ(ierr);
913 
914   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
915   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
916   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
917   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
918   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
919   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
920   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
921   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
922   PetscFunctionReturn(0);
923 }
924 
925 #undef __FUNCT__
926 #define __FUNCT__ "MatView_MPIAIJ_Binary"
927 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
928 {
929   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
930   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
931   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
932   PetscErrorCode    ierr;
933   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
934   int               fd;
935   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
936   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
937   PetscScalar       *column_values;
938 
939   PetscFunctionBegin;
940   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
941   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
942   nz   = A->nz + B->nz;
943   if (!rank) {
944     header[0] = MAT_FILE_COOKIE;
945     header[1] = mat->rmap->N;
946     header[2] = mat->cmap->N;
947     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
948     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
949     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
950     /* get largest number of rows any processor has */
951     rlen = mat->rmap->n;
952     range = mat->rmap->range;
953     for (i=1; i<size; i++) {
954       rlen = PetscMax(rlen,range[i+1] - range[i]);
955     }
956   } else {
957     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
958     rlen = mat->rmap->n;
959   }
960 
961   /* load up the local row counts */
962   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
963   for (i=0; i<mat->rmap->n; i++) {
964     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
965   }
966 
967   /* store the row lengths to the file */
968   if (!rank) {
969     MPI_Status status;
970     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
971     for (i=1; i<size; i++) {
972       rlen = range[i+1] - range[i];
973       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
974       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
975     }
976   } else {
977     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
978   }
979   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
980 
981   /* load up the local column indices */
982   nzmax = nz; /* )th processor needs space a largest processor needs */
983   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
984   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
985   cnt  = 0;
986   for (i=0; i<mat->rmap->n; i++) {
987     for (j=B->i[i]; j<B->i[i+1]; j++) {
988       if ( (col = garray[B->j[j]]) > cstart) break;
989       column_indices[cnt++] = col;
990     }
991     for (k=A->i[i]; k<A->i[i+1]; k++) {
992       column_indices[cnt++] = A->j[k] + cstart;
993     }
994     for (; j<B->i[i+1]; j++) {
995       column_indices[cnt++] = garray[B->j[j]];
996     }
997   }
998   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
999 
1000   /* store the column indices to the file */
1001   if (!rank) {
1002     MPI_Status status;
1003     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1004     for (i=1; i<size; i++) {
1005       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1006       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1007       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1008       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1009     }
1010   } else {
1011     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1012     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1013   }
1014   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1015 
1016   /* load up the local column values */
1017   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1018   cnt  = 0;
1019   for (i=0; i<mat->rmap->n; i++) {
1020     for (j=B->i[i]; j<B->i[i+1]; j++) {
1021       if ( garray[B->j[j]] > cstart) break;
1022       column_values[cnt++] = B->a[j];
1023     }
1024     for (k=A->i[i]; k<A->i[i+1]; k++) {
1025       column_values[cnt++] = A->a[k];
1026     }
1027     for (; j<B->i[i+1]; j++) {
1028       column_values[cnt++] = B->a[j];
1029     }
1030   }
1031   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1032 
1033   /* store the column values to the file */
1034   if (!rank) {
1035     MPI_Status status;
1036     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1037     for (i=1; i<size; i++) {
1038       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1039       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1040       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1041       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1042     }
1043   } else {
1044     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1045     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1046   }
1047   ierr = PetscFree(column_values);CHKERRQ(ierr);
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 #undef __FUNCT__
1052 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1053 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1054 {
1055   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1056   PetscErrorCode    ierr;
1057   PetscMPIInt       rank = aij->rank,size = aij->size;
1058   PetscTruth        isdraw,iascii,isbinary;
1059   PetscViewer       sviewer;
1060   PetscViewerFormat format;
1061 
1062   PetscFunctionBegin;
1063   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1064   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1065   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1066   if (iascii) {
1067     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1068     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1069       MatInfo    info;
1070       PetscTruth inodes;
1071 
1072       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1073       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1074       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1075       if (!inodes) {
1076         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1077 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1078       } else {
1079         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1080 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1081       }
1082       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1083       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1084       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1085       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1086       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1087       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1088       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1089       PetscFunctionReturn(0);
1090     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1091       PetscInt   inodecount,inodelimit,*inodes;
1092       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1093       if (inodes) {
1094         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1095       } else {
1096         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1097       }
1098       PetscFunctionReturn(0);
1099     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1100       PetscFunctionReturn(0);
1101     }
1102   } else if (isbinary) {
1103     if (size == 1) {
1104       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1105       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1106     } else {
1107       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1108     }
1109     PetscFunctionReturn(0);
1110   } else if (isdraw) {
1111     PetscDraw  draw;
1112     PetscTruth isnull;
1113     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1114     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1115   }
1116 
1117   if (size == 1) {
1118     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1119     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1120   } else {
1121     /* assemble the entire matrix onto first processor. */
1122     Mat         A;
1123     Mat_SeqAIJ  *Aloc;
1124     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1125     MatScalar   *a;
1126 
1127     if (mat->rmap->N > 1024) {
1128       PetscTruth flg = PETSC_FALSE;
1129 
1130       ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1131       if (!flg) {
1132         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1133       }
1134     }
1135 
1136     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1137     if (!rank) {
1138       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1139     } else {
1140       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1141     }
1142     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1143     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1144     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1145     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1146 
1147     /* copy over the A part */
1148     Aloc = (Mat_SeqAIJ*)aij->A->data;
1149     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1150     row = mat->rmap->rstart;
1151     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1152     for (i=0; i<m; i++) {
1153       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1154       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1155     }
1156     aj = Aloc->j;
1157     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1158 
1159     /* copy over the B part */
1160     Aloc = (Mat_SeqAIJ*)aij->B->data;
1161     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1162     row  = mat->rmap->rstart;
1163     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1164     ct   = cols;
1165     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1166     for (i=0; i<m; i++) {
1167       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1168       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1169     }
1170     ierr = PetscFree(ct);CHKERRQ(ierr);
1171     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1172     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1173     /*
1174        Everyone has to call to draw the matrix since the graphics waits are
1175        synchronized across all processors that share the PetscDraw object
1176     */
1177     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1178     if (!rank) {
1179       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1180       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1181     }
1182     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1183     ierr = MatDestroy(A);CHKERRQ(ierr);
1184   }
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 #undef __FUNCT__
1189 #define __FUNCT__ "MatView_MPIAIJ"
1190 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1191 {
1192   PetscErrorCode ierr;
1193   PetscTruth     iascii,isdraw,issocket,isbinary;
1194 
1195   PetscFunctionBegin;
1196   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1197   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1198   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1199   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1200   if (iascii || isdraw || isbinary || issocket) {
1201     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1202   } else {
1203     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1204   }
1205   PetscFunctionReturn(0);
1206 }
1207 
1208 #undef __FUNCT__
1209 #define __FUNCT__ "MatRelax_MPIAIJ"
1210 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1211 {
1212   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1213   PetscErrorCode ierr;
1214   Vec            bb1 = 0;
1215   PetscTruth     hasop;
1216 
1217   PetscFunctionBegin;
1218   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1219     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1220   }
1221 
1222   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1223     if (flag & SOR_ZERO_INITIAL_GUESS) {
1224       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1225       its--;
1226     }
1227 
1228     while (its--) {
1229       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1230       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1231 
1232       /* update rhs: bb1 = bb - B*x */
1233       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1234       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1235 
1236       /* local sweep */
1237       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1238     }
1239   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1240     if (flag & SOR_ZERO_INITIAL_GUESS) {
1241       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1242       its--;
1243     }
1244     while (its--) {
1245       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1246       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1247 
1248       /* update rhs: bb1 = bb - B*x */
1249       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1250       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1251 
1252       /* local sweep */
1253       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1254     }
1255   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1256     if (flag & SOR_ZERO_INITIAL_GUESS) {
1257       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1258       its--;
1259     }
1260     while (its--) {
1261       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1262       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1263 
1264       /* update rhs: bb1 = bb - B*x */
1265       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1266       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1267 
1268       /* local sweep */
1269       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1270     }
1271   }  else if (flag & SOR_EISENSTAT) {
1272     Vec         xx1;
1273 
1274     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1275     ierr = (*mat->A->ops->relax)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1276 
1277     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1278     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1279     if (!mat->diag) {
1280       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1281       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1282     }
1283     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1284     if (hasop) {
1285       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1286     } else {
1287       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1288     }
1289     ierr = VecAYPX(bb1,-1.0,bb);CHKERRQ(ierr);
1290     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1291 
1292     /* local sweep */
1293     ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1294     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1295     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1296   } else {
1297     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1298   }
1299 
1300   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 #undef __FUNCT__
1305 #define __FUNCT__ "MatPermute_MPIAIJ"
1306 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1307 {
1308   MPI_Comm       comm,pcomm;
1309   PetscInt       first,local_size,nrows;
1310   const PetscInt *rows;
1311   PetscMPIInt    size;
1312   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1313   PetscErrorCode ierr;
1314 
1315   PetscFunctionBegin;
1316   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1317   /* make a collective version of 'rowp' */
1318   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1319   if (pcomm==comm) {
1320     crowp = rowp;
1321   } else {
1322     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1323     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1324     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1325     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1326   }
1327   /* collect the global row permutation and invert it */
1328   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1329   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1330   if (pcomm!=comm) {
1331     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1332   }
1333   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1334   /* get the local target indices */
1335   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1336   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1337   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1338   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
1339   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1340   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1341   /* the column permutation is so much easier;
1342      make a local version of 'colp' and invert it */
1343   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1344   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1345   if (size==1) {
1346     lcolp = colp;
1347   } else {
1348     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1349     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1350     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
1351   }
1352   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1353   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1354   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1355   if (size>1) {
1356     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1357     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1358   }
1359   /* now we just get the submatrix */
1360   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1361   /* clean up */
1362   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1363   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1364   PetscFunctionReturn(0);
1365 }
1366 
1367 #undef __FUNCT__
1368 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1369 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1370 {
1371   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1372   Mat            A = mat->A,B = mat->B;
1373   PetscErrorCode ierr;
1374   PetscReal      isend[5],irecv[5];
1375 
1376   PetscFunctionBegin;
1377   info->block_size     = 1.0;
1378   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1379   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1380   isend[3] = info->memory;  isend[4] = info->mallocs;
1381   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1382   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1383   isend[3] += info->memory;  isend[4] += info->mallocs;
1384   if (flag == MAT_LOCAL) {
1385     info->nz_used      = isend[0];
1386     info->nz_allocated = isend[1];
1387     info->nz_unneeded  = isend[2];
1388     info->memory       = isend[3];
1389     info->mallocs      = isend[4];
1390   } else if (flag == MAT_GLOBAL_MAX) {
1391     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1392     info->nz_used      = irecv[0];
1393     info->nz_allocated = irecv[1];
1394     info->nz_unneeded  = irecv[2];
1395     info->memory       = irecv[3];
1396     info->mallocs      = irecv[4];
1397   } else if (flag == MAT_GLOBAL_SUM) {
1398     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1399     info->nz_used      = irecv[0];
1400     info->nz_allocated = irecv[1];
1401     info->nz_unneeded  = irecv[2];
1402     info->memory       = irecv[3];
1403     info->mallocs      = irecv[4];
1404   }
1405   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1406   info->fill_ratio_needed = 0;
1407   info->factor_mallocs    = 0;
1408 
1409   PetscFunctionReturn(0);
1410 }
1411 
1412 #undef __FUNCT__
1413 #define __FUNCT__ "MatSetOption_MPIAIJ"
1414 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1415 {
1416   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1417   PetscErrorCode ierr;
1418 
1419   PetscFunctionBegin;
1420   switch (op) {
1421   case MAT_NEW_NONZERO_LOCATIONS:
1422   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1423   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1424   case MAT_KEEP_NONZERO_PATTERN:
1425   case MAT_NEW_NONZERO_LOCATION_ERR:
1426   case MAT_USE_INODES:
1427   case MAT_IGNORE_ZERO_ENTRIES:
1428     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1429     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1430     break;
1431   case MAT_ROW_ORIENTED:
1432     a->roworiented = flg;
1433     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1434     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1435     break;
1436   case MAT_NEW_DIAGONALS:
1437     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1438     break;
1439   case MAT_IGNORE_OFF_PROC_ENTRIES:
1440     a->donotstash = PETSC_TRUE;
1441     break;
1442   case MAT_SYMMETRIC:
1443     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1444     break;
1445   case MAT_STRUCTURALLY_SYMMETRIC:
1446   case MAT_HERMITIAN:
1447   case MAT_SYMMETRY_ETERNAL:
1448     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1449     break;
1450   default:
1451     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1452   }
1453   PetscFunctionReturn(0);
1454 }
1455 
1456 #undef __FUNCT__
1457 #define __FUNCT__ "MatGetRow_MPIAIJ"
1458 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1459 {
1460   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1461   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1462   PetscErrorCode ierr;
1463   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1464   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1465   PetscInt       *cmap,*idx_p;
1466 
1467   PetscFunctionBegin;
1468   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1469   mat->getrowactive = PETSC_TRUE;
1470 
1471   if (!mat->rowvalues && (idx || v)) {
1472     /*
1473         allocate enough space to hold information from the longest row.
1474     */
1475     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1476     PetscInt     max = 1,tmp;
1477     for (i=0; i<matin->rmap->n; i++) {
1478       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1479       if (max < tmp) { max = tmp; }
1480     }
1481     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1482     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1483   }
1484 
1485   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1486   lrow = row - rstart;
1487 
1488   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1489   if (!v)   {pvA = 0; pvB = 0;}
1490   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1491   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1492   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1493   nztot = nzA + nzB;
1494 
1495   cmap  = mat->garray;
1496   if (v  || idx) {
1497     if (nztot) {
1498       /* Sort by increasing column numbers, assuming A and B already sorted */
1499       PetscInt imark = -1;
1500       if (v) {
1501         *v = v_p = mat->rowvalues;
1502         for (i=0; i<nzB; i++) {
1503           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1504           else break;
1505         }
1506         imark = i;
1507         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1508         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1509       }
1510       if (idx) {
1511         *idx = idx_p = mat->rowindices;
1512         if (imark > -1) {
1513           for (i=0; i<imark; i++) {
1514             idx_p[i] = cmap[cworkB[i]];
1515           }
1516         } else {
1517           for (i=0; i<nzB; i++) {
1518             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1519             else break;
1520           }
1521           imark = i;
1522         }
1523         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1524         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1525       }
1526     } else {
1527       if (idx) *idx = 0;
1528       if (v)   *v   = 0;
1529     }
1530   }
1531   *nz = nztot;
1532   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1533   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1534   PetscFunctionReturn(0);
1535 }
1536 
1537 #undef __FUNCT__
1538 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1539 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1540 {
1541   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1542 
1543   PetscFunctionBegin;
1544   if (!aij->getrowactive) {
1545     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1546   }
1547   aij->getrowactive = PETSC_FALSE;
1548   PetscFunctionReturn(0);
1549 }
1550 
1551 #undef __FUNCT__
1552 #define __FUNCT__ "MatNorm_MPIAIJ"
1553 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1554 {
1555   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1556   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1557   PetscErrorCode ierr;
1558   PetscInt       i,j,cstart = mat->cmap->rstart;
1559   PetscReal      sum = 0.0;
1560   MatScalar      *v;
1561 
1562   PetscFunctionBegin;
1563   if (aij->size == 1) {
1564     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1565   } else {
1566     if (type == NORM_FROBENIUS) {
1567       v = amat->a;
1568       for (i=0; i<amat->nz; i++) {
1569 #if defined(PETSC_USE_COMPLEX)
1570         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1571 #else
1572         sum += (*v)*(*v); v++;
1573 #endif
1574       }
1575       v = bmat->a;
1576       for (i=0; i<bmat->nz; i++) {
1577 #if defined(PETSC_USE_COMPLEX)
1578         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1579 #else
1580         sum += (*v)*(*v); v++;
1581 #endif
1582       }
1583       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1584       *norm = sqrt(*norm);
1585     } else if (type == NORM_1) { /* max column norm */
1586       PetscReal *tmp,*tmp2;
1587       PetscInt  *jj,*garray = aij->garray;
1588       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1589       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1590       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1591       *norm = 0.0;
1592       v = amat->a; jj = amat->j;
1593       for (j=0; j<amat->nz; j++) {
1594         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1595       }
1596       v = bmat->a; jj = bmat->j;
1597       for (j=0; j<bmat->nz; j++) {
1598         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1599       }
1600       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1601       for (j=0; j<mat->cmap->N; j++) {
1602         if (tmp2[j] > *norm) *norm = tmp2[j];
1603       }
1604       ierr = PetscFree(tmp);CHKERRQ(ierr);
1605       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1606     } else if (type == NORM_INFINITY) { /* max row norm */
1607       PetscReal ntemp = 0.0;
1608       for (j=0; j<aij->A->rmap->n; j++) {
1609         v = amat->a + amat->i[j];
1610         sum = 0.0;
1611         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1612           sum += PetscAbsScalar(*v); v++;
1613         }
1614         v = bmat->a + bmat->i[j];
1615         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1616           sum += PetscAbsScalar(*v); v++;
1617         }
1618         if (sum > ntemp) ntemp = sum;
1619       }
1620       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1621     } else {
1622       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1623     }
1624   }
1625   PetscFunctionReturn(0);
1626 }
1627 
1628 #undef __FUNCT__
1629 #define __FUNCT__ "MatTranspose_MPIAIJ"
1630 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1631 {
1632   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1633   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1634   PetscErrorCode ierr;
1635   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1636   PetscInt       cstart=A->cmap->rstart,ncol;
1637   Mat            B;
1638   MatScalar      *array;
1639 
1640   PetscFunctionBegin;
1641   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1642 
1643   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1644   ai = Aloc->i; aj = Aloc->j;
1645   bi = Bloc->i; bj = Bloc->j;
1646   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1647     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1648     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1649     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1650     for (i=0; i<ai[ma]; i++){
1651       d_nnz[aj[i]] ++;
1652       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1653     }
1654 
1655     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1656     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1657     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1658     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1659     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1660   } else {
1661     B = *matout;
1662   }
1663 
1664   /* copy over the A part */
1665   array = Aloc->a;
1666   row = A->rmap->rstart;
1667   for (i=0; i<ma; i++) {
1668     ncol = ai[i+1]-ai[i];
1669     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1670     row++; array += ncol; aj += ncol;
1671   }
1672   aj = Aloc->j;
1673   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1674 
1675   /* copy over the B part */
1676   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1677   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1678   array = Bloc->a;
1679   row = A->rmap->rstart;
1680   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1681   cols_tmp = cols;
1682   for (i=0; i<mb; i++) {
1683     ncol = bi[i+1]-bi[i];
1684     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1685     row++; array += ncol; cols_tmp += ncol;
1686   }
1687   ierr = PetscFree(cols);CHKERRQ(ierr);
1688 
1689   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1690   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1691   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1692     *matout = B;
1693   } else {
1694     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1695   }
1696   PetscFunctionReturn(0);
1697 }
1698 
1699 #undef __FUNCT__
1700 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1701 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1702 {
1703   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1704   Mat            a = aij->A,b = aij->B;
1705   PetscErrorCode ierr;
1706   PetscInt       s1,s2,s3;
1707 
1708   PetscFunctionBegin;
1709   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1710   if (rr) {
1711     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1712     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1713     /* Overlap communication with computation. */
1714     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1715   }
1716   if (ll) {
1717     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1718     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1719     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1720   }
1721   /* scale  the diagonal block */
1722   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1723 
1724   if (rr) {
1725     /* Do a scatter end and then right scale the off-diagonal block */
1726     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1727     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1728   }
1729 
1730   PetscFunctionReturn(0);
1731 }
1732 
1733 #undef __FUNCT__
1734 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1735 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1736 {
1737   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1738   PetscErrorCode ierr;
1739 
1740   PetscFunctionBegin;
1741   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1742   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1743   PetscFunctionReturn(0);
1744 }
1745 #undef __FUNCT__
1746 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1747 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1748 {
1749   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1750   PetscErrorCode ierr;
1751 
1752   PetscFunctionBegin;
1753   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1754   PetscFunctionReturn(0);
1755 }
1756 
1757 #undef __FUNCT__
1758 #define __FUNCT__ "MatEqual_MPIAIJ"
1759 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1760 {
1761   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1762   Mat            a,b,c,d;
1763   PetscTruth     flg;
1764   PetscErrorCode ierr;
1765 
1766   PetscFunctionBegin;
1767   a = matA->A; b = matA->B;
1768   c = matB->A; d = matB->B;
1769 
1770   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1771   if (flg) {
1772     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1773   }
1774   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1775   PetscFunctionReturn(0);
1776 }
1777 
1778 #undef __FUNCT__
1779 #define __FUNCT__ "MatCopy_MPIAIJ"
1780 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1781 {
1782   PetscErrorCode ierr;
1783   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1784   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1785 
1786   PetscFunctionBegin;
1787   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1788   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1789     /* because of the column compression in the off-processor part of the matrix a->B,
1790        the number of columns in a->B and b->B may be different, hence we cannot call
1791        the MatCopy() directly on the two parts. If need be, we can provide a more
1792        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1793        then copying the submatrices */
1794     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1795   } else {
1796     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1797     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1798   }
1799   PetscFunctionReturn(0);
1800 }
1801 
1802 #undef __FUNCT__
1803 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1804 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1805 {
1806   PetscErrorCode ierr;
1807 
1808   PetscFunctionBegin;
1809   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1810   PetscFunctionReturn(0);
1811 }
1812 
1813 #include "petscblaslapack.h"
1814 #undef __FUNCT__
1815 #define __FUNCT__ "MatAXPY_MPIAIJ"
1816 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1817 {
1818   PetscErrorCode ierr;
1819   PetscInt       i;
1820   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1821   PetscBLASInt   bnz,one=1;
1822   Mat_SeqAIJ     *x,*y;
1823 
1824   PetscFunctionBegin;
1825   if (str == SAME_NONZERO_PATTERN) {
1826     PetscScalar alpha = a;
1827     x = (Mat_SeqAIJ *)xx->A->data;
1828     y = (Mat_SeqAIJ *)yy->A->data;
1829     bnz = PetscBLASIntCast(x->nz);
1830     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1831     x = (Mat_SeqAIJ *)xx->B->data;
1832     y = (Mat_SeqAIJ *)yy->B->data;
1833     bnz = PetscBLASIntCast(x->nz);
1834     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1835   } else if (str == SUBSET_NONZERO_PATTERN) {
1836     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1837 
1838     x = (Mat_SeqAIJ *)xx->B->data;
1839     y = (Mat_SeqAIJ *)yy->B->data;
1840     if (y->xtoy && y->XtoY != xx->B) {
1841       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1842       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1843     }
1844     if (!y->xtoy) { /* get xtoy */
1845       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1846       y->XtoY = xx->B;
1847       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1848     }
1849     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1850   } else {
1851     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1852   }
1853   PetscFunctionReturn(0);
1854 }
1855 
1856 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1857 
1858 #undef __FUNCT__
1859 #define __FUNCT__ "MatConjugate_MPIAIJ"
1860 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1861 {
1862 #if defined(PETSC_USE_COMPLEX)
1863   PetscErrorCode ierr;
1864   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1865 
1866   PetscFunctionBegin;
1867   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1868   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1869 #else
1870   PetscFunctionBegin;
1871 #endif
1872   PetscFunctionReturn(0);
1873 }
1874 
1875 #undef __FUNCT__
1876 #define __FUNCT__ "MatRealPart_MPIAIJ"
1877 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1878 {
1879   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1880   PetscErrorCode ierr;
1881 
1882   PetscFunctionBegin;
1883   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1884   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1885   PetscFunctionReturn(0);
1886 }
1887 
1888 #undef __FUNCT__
1889 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1890 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1891 {
1892   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1893   PetscErrorCode ierr;
1894 
1895   PetscFunctionBegin;
1896   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1897   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1898   PetscFunctionReturn(0);
1899 }
1900 
1901 #ifdef PETSC_HAVE_PBGL
1902 
1903 #include <boost/parallel/mpi/bsp_process_group.hpp>
1904 #include <boost/graph/distributed/ilu_default_graph.hpp>
1905 #include <boost/graph/distributed/ilu_0_block.hpp>
1906 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1907 #include <boost/graph/distributed/petsc/interface.hpp>
1908 #include <boost/multi_array.hpp>
1909 #include <boost/parallel/distributed_property_map->hpp>
1910 
1911 #undef __FUNCT__
1912 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1913 /*
1914   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1915 */
1916 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1917 {
1918   namespace petsc = boost::distributed::petsc;
1919 
1920   namespace graph_dist = boost::graph::distributed;
1921   using boost::graph::distributed::ilu_default::process_group_type;
1922   using boost::graph::ilu_permuted;
1923 
1924   PetscTruth      row_identity, col_identity;
1925   PetscContainer  c;
1926   PetscInt        m, n, M, N;
1927   PetscErrorCode  ierr;
1928 
1929   PetscFunctionBegin;
1930   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1931   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1932   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1933   if (!row_identity || !col_identity) {
1934     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1935   }
1936 
1937   process_group_type pg;
1938   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1939   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1940   lgraph_type&   level_graph = *lgraph_p;
1941   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1942 
1943   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1944   ilu_permuted(level_graph);
1945 
1946   /* put together the new matrix */
1947   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1948   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1949   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1950   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1951   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1952   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1953   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1954 
1955   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1956   ierr = PetscContainerSetPointer(c, lgraph_p);
1957   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1958   PetscFunctionReturn(0);
1959 }
1960 
1961 #undef __FUNCT__
1962 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1963 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1964 {
1965   PetscFunctionBegin;
1966   PetscFunctionReturn(0);
1967 }
1968 
1969 #undef __FUNCT__
1970 #define __FUNCT__ "MatSolve_MPIAIJ"
1971 /*
1972   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1973 */
1974 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1975 {
1976   namespace graph_dist = boost::graph::distributed;
1977 
1978   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1979   lgraph_type*   lgraph_p;
1980   PetscContainer c;
1981   PetscErrorCode ierr;
1982 
1983   PetscFunctionBegin;
1984   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1985   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1986   ierr = VecCopy(b, x);CHKERRQ(ierr);
1987 
1988   PetscScalar* array_x;
1989   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1990   PetscInt sx;
1991   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1992 
1993   PetscScalar* array_b;
1994   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1995   PetscInt sb;
1996   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1997 
1998   lgraph_type&   level_graph = *lgraph_p;
1999   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2000 
2001   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2002   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2003                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2004 
2005   typedef boost::iterator_property_map<array_ref_type::iterator,
2006                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2007   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2008                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2009 
2010   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2011 
2012   PetscFunctionReturn(0);
2013 }
2014 #endif
2015 
2016 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2017   PetscInt       nzlocal,nsends,nrecvs;
2018   PetscMPIInt    *send_rank;
2019   PetscInt       *sbuf_nz,*sbuf_j,**rbuf_j;
2020   PetscScalar    *sbuf_a,**rbuf_a;
2021   PetscErrorCode (*MatDestroy)(Mat);
2022 } Mat_Redundant;
2023 
2024 #undef __FUNCT__
2025 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2026 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2027 {
2028   PetscErrorCode       ierr;
2029   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2030   PetscInt             i;
2031 
2032   PetscFunctionBegin;
2033   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
2034   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2035   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2036   for (i=0; i<redund->nrecvs; i++){
2037     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2038     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2039   }
2040   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2041   ierr = PetscFree(redund);CHKERRQ(ierr);
2042   PetscFunctionReturn(0);
2043 }
2044 
2045 #undef __FUNCT__
2046 #define __FUNCT__ "MatDestroy_MatRedundant"
2047 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2048 {
2049   PetscErrorCode  ierr;
2050   PetscContainer  container;
2051   Mat_Redundant   *redund=PETSC_NULL;
2052 
2053   PetscFunctionBegin;
2054   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2055   if (container) {
2056     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2057   } else {
2058     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2059   }
2060   A->ops->destroy = redund->MatDestroy;
2061   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2062   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2063   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2064   PetscFunctionReturn(0);
2065 }
2066 
2067 #undef __FUNCT__
2068 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2069 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2070 {
2071   PetscMPIInt    rank,size;
2072   MPI_Comm       comm=((PetscObject)mat)->comm;
2073   PetscErrorCode ierr;
2074   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2075   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2076   PetscInt       *rowrange=mat->rmap->range;
2077   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2078   Mat            A=aij->A,B=aij->B,C=*matredundant;
2079   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2080   PetscScalar    *sbuf_a;
2081   PetscInt       nzlocal=a->nz+b->nz;
2082   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2083   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2084   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2085   MatScalar      *aworkA,*aworkB;
2086   PetscScalar    *vals;
2087   PetscMPIInt    tag1,tag2,tag3,imdex;
2088   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2089                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2090   MPI_Status     recv_status,*send_status;
2091   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2092   PetscInt       **rbuf_j=PETSC_NULL;
2093   PetscScalar    **rbuf_a=PETSC_NULL;
2094   Mat_Redundant  *redund=PETSC_NULL;
2095   PetscContainer container;
2096 
2097   PetscFunctionBegin;
2098   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2099   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2100 
2101   if (reuse == MAT_REUSE_MATRIX) {
2102     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2103     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2104     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2105     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2106     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2107     if (container) {
2108       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2109     } else {
2110       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2111     }
2112     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2113 
2114     nsends    = redund->nsends;
2115     nrecvs    = redund->nrecvs;
2116     send_rank = redund->send_rank; recv_rank = send_rank + size;
2117     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
2118     sbuf_j    = redund->sbuf_j;
2119     sbuf_a    = redund->sbuf_a;
2120     rbuf_j    = redund->rbuf_j;
2121     rbuf_a    = redund->rbuf_a;
2122   }
2123 
2124   if (reuse == MAT_INITIAL_MATRIX){
2125     PetscMPIInt  subrank,subsize;
2126     PetscInt     nleftover,np_subcomm;
2127     /* get the destination processors' id send_rank, nsends and nrecvs */
2128     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2129     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2130     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
2131     recv_rank = send_rank + size;
2132     np_subcomm = size/nsubcomm;
2133     nleftover  = size - nsubcomm*np_subcomm;
2134     nsends = 0; nrecvs = 0;
2135     for (i=0; i<size; i++){ /* i=rank*/
2136       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2137         send_rank[nsends] = i; nsends++;
2138         recv_rank[nrecvs++] = i;
2139       }
2140     }
2141     if (rank >= size - nleftover){/* this proc is a leftover processor */
2142       i = size-nleftover-1;
2143       j = 0;
2144       while (j < nsubcomm - nleftover){
2145         send_rank[nsends++] = i;
2146         i--; j++;
2147       }
2148     }
2149 
2150     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2151       for (i=0; i<nleftover; i++){
2152         recv_rank[nrecvs++] = size-nleftover+i;
2153       }
2154     }
2155 
2156     /* allocate sbuf_j, sbuf_a */
2157     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2158     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2159     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2160   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2161 
2162   /* copy mat's local entries into the buffers */
2163   if (reuse == MAT_INITIAL_MATRIX){
2164     rownz_max = 0;
2165     rptr = sbuf_j;
2166     cols = sbuf_j + rend-rstart + 1;
2167     vals = sbuf_a;
2168     rptr[0] = 0;
2169     for (i=0; i<rend-rstart; i++){
2170       row = i + rstart;
2171       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2172       ncols  = nzA + nzB;
2173       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2174       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2175       /* load the column indices for this row into cols */
2176       lwrite = 0;
2177       for (l=0; l<nzB; l++) {
2178         if ((ctmp = bmap[cworkB[l]]) < cstart){
2179           vals[lwrite]   = aworkB[l];
2180           cols[lwrite++] = ctmp;
2181         }
2182       }
2183       for (l=0; l<nzA; l++){
2184         vals[lwrite]   = aworkA[l];
2185         cols[lwrite++] = cstart + cworkA[l];
2186       }
2187       for (l=0; l<nzB; l++) {
2188         if ((ctmp = bmap[cworkB[l]]) >= cend){
2189           vals[lwrite]   = aworkB[l];
2190           cols[lwrite++] = ctmp;
2191         }
2192       }
2193       vals += ncols;
2194       cols += ncols;
2195       rptr[i+1] = rptr[i] + ncols;
2196       if (rownz_max < ncols) rownz_max = ncols;
2197     }
2198     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2199   } else { /* only copy matrix values into sbuf_a */
2200     rptr = sbuf_j;
2201     vals = sbuf_a;
2202     rptr[0] = 0;
2203     for (i=0; i<rend-rstart; i++){
2204       row = i + rstart;
2205       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2206       ncols  = nzA + nzB;
2207       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2208       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2209       lwrite = 0;
2210       for (l=0; l<nzB; l++) {
2211         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2212       }
2213       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2214       for (l=0; l<nzB; l++) {
2215         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2216       }
2217       vals += ncols;
2218       rptr[i+1] = rptr[i] + ncols;
2219     }
2220   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2221 
2222   /* send nzlocal to others, and recv other's nzlocal */
2223   /*--------------------------------------------------*/
2224   if (reuse == MAT_INITIAL_MATRIX){
2225     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2226     s_waits2 = s_waits3 + nsends;
2227     s_waits1 = s_waits2 + nsends;
2228     r_waits1 = s_waits1 + nsends;
2229     r_waits2 = r_waits1 + nrecvs;
2230     r_waits3 = r_waits2 + nrecvs;
2231   } else {
2232     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2233     r_waits3 = s_waits3 + nsends;
2234   }
2235 
2236   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2237   if (reuse == MAT_INITIAL_MATRIX){
2238     /* get new tags to keep the communication clean */
2239     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2240     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2241     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2242     rbuf_nz = sbuf_nz + nsends;
2243 
2244     /* post receives of other's nzlocal */
2245     for (i=0; i<nrecvs; i++){
2246       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2247     }
2248     /* send nzlocal to others */
2249     for (i=0; i<nsends; i++){
2250       sbuf_nz[i] = nzlocal;
2251       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2252     }
2253     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2254     count = nrecvs;
2255     while (count) {
2256       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2257       recv_rank[imdex] = recv_status.MPI_SOURCE;
2258       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2259       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2260 
2261       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2262       rbuf_nz[imdex] += i + 2;
2263       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2264       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2265       count--;
2266     }
2267     /* wait on sends of nzlocal */
2268     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2269     /* send mat->i,j to others, and recv from other's */
2270     /*------------------------------------------------*/
2271     for (i=0; i<nsends; i++){
2272       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2273       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2274     }
2275     /* wait on receives of mat->i,j */
2276     /*------------------------------*/
2277     count = nrecvs;
2278     while (count) {
2279       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2280       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2281       count--;
2282     }
2283     /* wait on sends of mat->i,j */
2284     /*---------------------------*/
2285     if (nsends) {
2286       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2287     }
2288   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2289 
2290   /* post receives, send and receive mat->a */
2291   /*----------------------------------------*/
2292   for (imdex=0; imdex<nrecvs; imdex++) {
2293     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2294   }
2295   for (i=0; i<nsends; i++){
2296     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2297   }
2298   count = nrecvs;
2299   while (count) {
2300     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2301     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2302     count--;
2303   }
2304   if (nsends) {
2305     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2306   }
2307 
2308   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2309 
2310   /* create redundant matrix */
2311   /*-------------------------*/
2312   if (reuse == MAT_INITIAL_MATRIX){
2313     /* compute rownz_max for preallocation */
2314     for (imdex=0; imdex<nrecvs; imdex++){
2315       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2316       rptr = rbuf_j[imdex];
2317       for (i=0; i<j; i++){
2318         ncols = rptr[i+1] - rptr[i];
2319         if (rownz_max < ncols) rownz_max = ncols;
2320       }
2321     }
2322 
2323     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2324     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2325     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2326     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2327     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2328   } else {
2329     C = *matredundant;
2330   }
2331 
2332   /* insert local matrix entries */
2333   rptr = sbuf_j;
2334   cols = sbuf_j + rend-rstart + 1;
2335   vals = sbuf_a;
2336   for (i=0; i<rend-rstart; i++){
2337     row   = i + rstart;
2338     ncols = rptr[i+1] - rptr[i];
2339     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2340     vals += ncols;
2341     cols += ncols;
2342   }
2343   /* insert received matrix entries */
2344   for (imdex=0; imdex<nrecvs; imdex++){
2345     rstart = rowrange[recv_rank[imdex]];
2346     rend   = rowrange[recv_rank[imdex]+1];
2347     rptr = rbuf_j[imdex];
2348     cols = rbuf_j[imdex] + rend-rstart + 1;
2349     vals = rbuf_a[imdex];
2350     for (i=0; i<rend-rstart; i++){
2351       row   = i + rstart;
2352       ncols = rptr[i+1] - rptr[i];
2353       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2354       vals += ncols;
2355       cols += ncols;
2356     }
2357   }
2358   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2359   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2360   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2361   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2362   if (reuse == MAT_INITIAL_MATRIX){
2363     PetscContainer container;
2364     *matredundant = C;
2365     /* create a supporting struct and attach it to C for reuse */
2366     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2367     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2368     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2369     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2370     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2371 
2372     redund->nzlocal = nzlocal;
2373     redund->nsends  = nsends;
2374     redund->nrecvs  = nrecvs;
2375     redund->send_rank = send_rank;
2376     redund->sbuf_nz = sbuf_nz;
2377     redund->sbuf_j  = sbuf_j;
2378     redund->sbuf_a  = sbuf_a;
2379     redund->rbuf_j  = rbuf_j;
2380     redund->rbuf_a  = rbuf_a;
2381 
2382     redund->MatDestroy = C->ops->destroy;
2383     C->ops->destroy    = MatDestroy_MatRedundant;
2384   }
2385   PetscFunctionReturn(0);
2386 }
2387 
2388 #undef __FUNCT__
2389 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2390 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2391 {
2392   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2393   PetscErrorCode ierr;
2394   PetscInt       i,*idxb = 0;
2395   PetscScalar    *va,*vb;
2396   Vec            vtmp;
2397 
2398   PetscFunctionBegin;
2399   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2400   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2401   if (idx) {
2402     for (i=0; i<A->rmap->n; i++) {
2403       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2404     }
2405   }
2406 
2407   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2408   if (idx) {
2409     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2410   }
2411   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2412   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2413 
2414   for (i=0; i<A->rmap->n; i++){
2415     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2416       va[i] = vb[i];
2417       if (idx) idx[i] = a->garray[idxb[i]];
2418     }
2419   }
2420 
2421   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2422   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2423   if (idxb) {
2424     ierr = PetscFree(idxb);CHKERRQ(ierr);
2425   }
2426   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2427   PetscFunctionReturn(0);
2428 }
2429 
2430 #undef __FUNCT__
2431 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2432 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2433 {
2434   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2435   PetscErrorCode ierr;
2436   PetscInt       i,*idxb = 0;
2437   PetscScalar    *va,*vb;
2438   Vec            vtmp;
2439 
2440   PetscFunctionBegin;
2441   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2442   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2443   if (idx) {
2444     for (i=0; i<A->cmap->n; i++) {
2445       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2446     }
2447   }
2448 
2449   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2450   if (idx) {
2451     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2452   }
2453   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2454   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2455 
2456   for (i=0; i<A->rmap->n; i++){
2457     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2458       va[i] = vb[i];
2459       if (idx) idx[i] = a->garray[idxb[i]];
2460     }
2461   }
2462 
2463   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2464   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2465   if (idxb) {
2466     ierr = PetscFree(idxb);CHKERRQ(ierr);
2467   }
2468   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2469   PetscFunctionReturn(0);
2470 }
2471 
2472 #undef __FUNCT__
2473 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2474 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2475 {
2476   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2477   PetscInt       n      = A->rmap->n;
2478   PetscInt       cstart = A->cmap->rstart;
2479   PetscInt      *cmap   = mat->garray;
2480   PetscInt      *diagIdx, *offdiagIdx;
2481   Vec            diagV, offdiagV;
2482   PetscScalar   *a, *diagA, *offdiagA;
2483   PetscInt       r;
2484   PetscErrorCode ierr;
2485 
2486   PetscFunctionBegin;
2487   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2488   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2489   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2490   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2491   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2492   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2493   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2494   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2495   for(r = 0; r < n; ++r) {
2496     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2497       a[r]   = diagA[r];
2498       idx[r] = cstart + diagIdx[r];
2499     } else {
2500       a[r]   = offdiagA[r];
2501       idx[r] = cmap[offdiagIdx[r]];
2502     }
2503   }
2504   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2505   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2506   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2507   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2508   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2509   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2510   PetscFunctionReturn(0);
2511 }
2512 
2513 #undef __FUNCT__
2514 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2515 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2516 {
2517   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2518   PetscInt       n      = A->rmap->n;
2519   PetscInt       cstart = A->cmap->rstart;
2520   PetscInt      *cmap   = mat->garray;
2521   PetscInt      *diagIdx, *offdiagIdx;
2522   Vec            diagV, offdiagV;
2523   PetscScalar   *a, *diagA, *offdiagA;
2524   PetscInt       r;
2525   PetscErrorCode ierr;
2526 
2527   PetscFunctionBegin;
2528   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2529   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2530   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2531   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2532   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2533   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2534   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2535   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2536   for(r = 0; r < n; ++r) {
2537     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2538       a[r]   = diagA[r];
2539       idx[r] = cstart + diagIdx[r];
2540     } else {
2541       a[r]   = offdiagA[r];
2542       idx[r] = cmap[offdiagIdx[r]];
2543     }
2544   }
2545   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2546   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2547   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2548   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2549   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2550   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2551   PetscFunctionReturn(0);
2552 }
2553 
2554 #undef __FUNCT__
2555 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2556 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2557 {
2558   PetscErrorCode ierr;
2559   Mat            *dummy;
2560 
2561   PetscFunctionBegin;
2562   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2563   *newmat = *dummy;
2564   ierr = PetscFree(dummy);CHKERRQ(ierr);
2565   PetscFunctionReturn(0);
2566 }
2567 
2568 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2569 /* -------------------------------------------------------------------*/
2570 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2571        MatGetRow_MPIAIJ,
2572        MatRestoreRow_MPIAIJ,
2573        MatMult_MPIAIJ,
2574 /* 4*/ MatMultAdd_MPIAIJ,
2575        MatMultTranspose_MPIAIJ,
2576        MatMultTransposeAdd_MPIAIJ,
2577 #ifdef PETSC_HAVE_PBGL
2578        MatSolve_MPIAIJ,
2579 #else
2580        0,
2581 #endif
2582        0,
2583        0,
2584 /*10*/ 0,
2585        0,
2586        0,
2587        MatRelax_MPIAIJ,
2588        MatTranspose_MPIAIJ,
2589 /*15*/ MatGetInfo_MPIAIJ,
2590        MatEqual_MPIAIJ,
2591        MatGetDiagonal_MPIAIJ,
2592        MatDiagonalScale_MPIAIJ,
2593        MatNorm_MPIAIJ,
2594 /*20*/ MatAssemblyBegin_MPIAIJ,
2595        MatAssemblyEnd_MPIAIJ,
2596        MatSetOption_MPIAIJ,
2597        MatZeroEntries_MPIAIJ,
2598 /*24*/ MatZeroRows_MPIAIJ,
2599        0,
2600 #ifdef PETSC_HAVE_PBGL
2601        0,
2602 #else
2603        0,
2604 #endif
2605        0,
2606        0,
2607 /*29*/ MatSetUpPreallocation_MPIAIJ,
2608 #ifdef PETSC_HAVE_PBGL
2609        0,
2610 #else
2611        0,
2612 #endif
2613        0,
2614        0,
2615        0,
2616 /*34*/ MatDuplicate_MPIAIJ,
2617        0,
2618        0,
2619        0,
2620        0,
2621 /*39*/ MatAXPY_MPIAIJ,
2622        MatGetSubMatrices_MPIAIJ,
2623        MatIncreaseOverlap_MPIAIJ,
2624        MatGetValues_MPIAIJ,
2625        MatCopy_MPIAIJ,
2626 /*44*/ MatGetRowMax_MPIAIJ,
2627        MatScale_MPIAIJ,
2628        0,
2629        0,
2630        0,
2631 /*49*/ MatSetBlockSize_MPIAIJ,
2632        0,
2633        0,
2634        0,
2635        0,
2636 /*54*/ MatFDColoringCreate_MPIAIJ,
2637        0,
2638        MatSetUnfactored_MPIAIJ,
2639        MatPermute_MPIAIJ,
2640        0,
2641 /*59*/ MatGetSubMatrix_MPIAIJ,
2642        MatDestroy_MPIAIJ,
2643        MatView_MPIAIJ,
2644        0,
2645        0,
2646 /*64*/ 0,
2647        0,
2648        0,
2649        0,
2650        0,
2651 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2652        MatGetRowMinAbs_MPIAIJ,
2653        0,
2654        MatSetColoring_MPIAIJ,
2655 #if defined(PETSC_HAVE_ADIC)
2656        MatSetValuesAdic_MPIAIJ,
2657 #else
2658        0,
2659 #endif
2660        MatSetValuesAdifor_MPIAIJ,
2661 /*75*/ MatFDColoringApply_AIJ,
2662        0,
2663        0,
2664        0,
2665        0,
2666 /*80*/ 0,
2667        0,
2668        0,
2669 /*83*/ MatLoad_MPIAIJ,
2670        0,
2671        0,
2672        0,
2673        0,
2674        0,
2675 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2676        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2677        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2678        MatPtAP_Basic,
2679        MatPtAPSymbolic_MPIAIJ,
2680 /*94*/ MatPtAPNumeric_MPIAIJ,
2681        0,
2682        0,
2683        0,
2684        0,
2685 /*99*/ 0,
2686        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2687        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2688        MatConjugate_MPIAIJ,
2689        0,
2690 /*104*/MatSetValuesRow_MPIAIJ,
2691        MatRealPart_MPIAIJ,
2692        MatImaginaryPart_MPIAIJ,
2693        0,
2694        0,
2695 /*109*/0,
2696        MatGetRedundantMatrix_MPIAIJ,
2697        MatGetRowMin_MPIAIJ,
2698        0,
2699        0,
2700 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
2701        0,
2702        0,
2703        0,
2704        0,
2705        0
2706 };
2707 
2708 /* ----------------------------------------------------------------------------------------*/
2709 
2710 EXTERN_C_BEGIN
2711 #undef __FUNCT__
2712 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2713 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2714 {
2715   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2716   PetscErrorCode ierr;
2717 
2718   PetscFunctionBegin;
2719   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2720   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2721   PetscFunctionReturn(0);
2722 }
2723 EXTERN_C_END
2724 
2725 EXTERN_C_BEGIN
2726 #undef __FUNCT__
2727 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2728 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2729 {
2730   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2731   PetscErrorCode ierr;
2732 
2733   PetscFunctionBegin;
2734   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2735   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2736   PetscFunctionReturn(0);
2737 }
2738 EXTERN_C_END
2739 
2740 #include "petscpc.h"
2741 EXTERN_C_BEGIN
2742 #undef __FUNCT__
2743 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2744 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2745 {
2746   Mat_MPIAIJ     *b;
2747   PetscErrorCode ierr;
2748   PetscInt       i;
2749 
2750   PetscFunctionBegin;
2751   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2752   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2753   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2754   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2755 
2756   ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2757   ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2758   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2759   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2760   if (d_nnz) {
2761     for (i=0; i<B->rmap->n; i++) {
2762       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2763     }
2764   }
2765   if (o_nnz) {
2766     for (i=0; i<B->rmap->n; i++) {
2767       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2768     }
2769   }
2770   b = (Mat_MPIAIJ*)B->data;
2771 
2772   if (!B->preallocated) {
2773     /* Explicitly create 2 MATSEQAIJ matrices. */
2774     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2775     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2776     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2777     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2778     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2779     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2780     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2781     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2782   }
2783 
2784   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2785   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2786   B->preallocated = PETSC_TRUE;
2787   PetscFunctionReturn(0);
2788 }
2789 EXTERN_C_END
2790 
2791 #undef __FUNCT__
2792 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2793 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2794 {
2795   Mat            mat;
2796   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2797   PetscErrorCode ierr;
2798 
2799   PetscFunctionBegin;
2800   *newmat       = 0;
2801   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2802   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2803   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2804   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2805   a    = (Mat_MPIAIJ*)mat->data;
2806 
2807   mat->factor       = matin->factor;
2808   mat->rmap->bs      = matin->rmap->bs;
2809   mat->assembled    = PETSC_TRUE;
2810   mat->insertmode   = NOT_SET_VALUES;
2811   mat->preallocated = PETSC_TRUE;
2812 
2813   a->size           = oldmat->size;
2814   a->rank           = oldmat->rank;
2815   a->donotstash     = oldmat->donotstash;
2816   a->roworiented    = oldmat->roworiented;
2817   a->rowindices     = 0;
2818   a->rowvalues      = 0;
2819   a->getrowactive   = PETSC_FALSE;
2820 
2821   ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr);
2822   ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr);
2823 
2824   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2825   if (oldmat->colmap) {
2826 #if defined (PETSC_USE_CTABLE)
2827     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2828 #else
2829     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2830     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2831     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2832 #endif
2833   } else a->colmap = 0;
2834   if (oldmat->garray) {
2835     PetscInt len;
2836     len  = oldmat->B->cmap->n;
2837     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2838     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2839     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2840   } else a->garray = 0;
2841 
2842   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2843   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2844   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2845   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2846   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2847   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2848   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2849   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2850   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2851   *newmat = mat;
2852   PetscFunctionReturn(0);
2853 }
2854 
2855 #include "petscsys.h"
2856 
2857 #undef __FUNCT__
2858 #define __FUNCT__ "MatLoad_MPIAIJ"
2859 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2860 {
2861   Mat            A;
2862   PetscScalar    *vals,*svals;
2863   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2864   MPI_Status     status;
2865   PetscErrorCode ierr;
2866   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz;
2867   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2868   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2869   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2870   PetscInt       cend,cstart,n,*rowners;
2871   int            fd;
2872 
2873   PetscFunctionBegin;
2874   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2875   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2876   if (!rank) {
2877     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2878     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2879     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2880   }
2881 
2882   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2883   M = header[1]; N = header[2];
2884   /* determine ownership of all rows */
2885   m    = M/size + ((M % size) > rank);
2886   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2887   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2888 
2889   /* First process needs enough room for process with most rows */
2890   if (!rank) {
2891     mmax       = rowners[1];
2892     for (i=2; i<size; i++) {
2893       mmax = PetscMax(mmax,rowners[i]);
2894     }
2895   } else mmax = m;
2896 
2897   rowners[0] = 0;
2898   for (i=2; i<=size; i++) {
2899     rowners[i] += rowners[i-1];
2900   }
2901   rstart = rowners[rank];
2902   rend   = rowners[rank+1];
2903 
2904   /* distribute row lengths to all processors */
2905   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2906   if (!rank) {
2907     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2908     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2909     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2910     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2911     for (j=0; j<m; j++) {
2912       procsnz[0] += ourlens[j];
2913     }
2914     for (i=1; i<size; i++) {
2915       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2916       /* calculate the number of nonzeros on each processor */
2917       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2918         procsnz[i] += rowlengths[j];
2919       }
2920       mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]);
2921       ierr   = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2922     }
2923     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2924   } else {
2925     mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr);
2926     ierr   = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2927   }
2928 
2929   if (!rank) {
2930     /* determine max buffer needed and allocate it */
2931     maxnz = 0;
2932     for (i=0; i<size; i++) {
2933       maxnz = PetscMax(maxnz,procsnz[i]);
2934     }
2935     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2936 
2937     /* read in my part of the matrix column indices  */
2938     nz   = procsnz[0];
2939     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2940     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2941 
2942     /* read in every one elses and ship off */
2943     for (i=1; i<size; i++) {
2944       nz     = procsnz[i];
2945       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2946       mpicnt = PetscMPIIntCast(nz);
2947       ierr   = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2948     }
2949     ierr = PetscFree(cols);CHKERRQ(ierr);
2950   } else {
2951     /* determine buffer space needed for message */
2952     nz = 0;
2953     for (i=0; i<m; i++) {
2954       nz += ourlens[i];
2955     }
2956     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2957 
2958     /* receive message of column indices*/
2959     mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr);
2960     ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2961     ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr);
2962     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
2963     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
2964     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
2965   }
2966 
2967   /* determine column ownership if matrix is not square */
2968   if (N != M) {
2969     n      = N/size + ((N % size) > rank);
2970     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2971     cstart = cend - n;
2972   } else {
2973     cstart = rstart;
2974     cend   = rend;
2975     n      = cend - cstart;
2976   }
2977 
2978   /* loop over local rows, determining number of off diagonal entries */
2979   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2980   jj = 0;
2981   for (i=0; i<m; i++) {
2982     for (j=0; j<ourlens[i]; j++) {
2983       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2984       jj++;
2985     }
2986   }
2987 
2988   /* create our matrix */
2989   for (i=0; i<m; i++) {
2990     ourlens[i] -= offlens[i];
2991   }
2992   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2993   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2994   ierr = MatSetType(A,type);CHKERRQ(ierr);
2995   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2996 
2997   for (i=0; i<m; i++) {
2998     ourlens[i] += offlens[i];
2999   }
3000 
3001   if (!rank) {
3002     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3003 
3004     /* read in my part of the matrix numerical values  */
3005     nz   = procsnz[0];
3006     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3007 
3008     /* insert into matrix */
3009     jj      = rstart;
3010     smycols = mycols;
3011     svals   = vals;
3012     for (i=0; i<m; i++) {
3013       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3014       smycols += ourlens[i];
3015       svals   += ourlens[i];
3016       jj++;
3017     }
3018 
3019     /* read in other processors and ship out */
3020     for (i=1; i<size; i++) {
3021       nz     = procsnz[i];
3022       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3023       mpicnt = PetscMPIIntCast(nz);
3024       ierr   = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3025     }
3026     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3027   } else {
3028     /* receive numeric values */
3029     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3030 
3031     /* receive message of values*/
3032     mpicnt = PetscMPIIntCast(nz);
3033     ierr   = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
3034     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr);
3035     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
3036     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
3037     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
3038 
3039     /* insert into matrix */
3040     jj      = rstart;
3041     smycols = mycols;
3042     svals   = vals;
3043     for (i=0; i<m; i++) {
3044       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3045       smycols += ourlens[i];
3046       svals   += ourlens[i];
3047       jj++;
3048     }
3049   }
3050   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3051   ierr = PetscFree(vals);CHKERRQ(ierr);
3052   ierr = PetscFree(mycols);CHKERRQ(ierr);
3053   ierr = PetscFree(rowners);CHKERRQ(ierr);
3054 
3055   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3056   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3057   *newmat = A;
3058   PetscFunctionReturn(0);
3059 }
3060 
3061 #undef __FUNCT__
3062 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3063 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3064 {
3065   PetscErrorCode ierr;
3066   IS             iscol_local;
3067   PetscInt       csize;
3068 
3069   PetscFunctionBegin;
3070   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3071   if (call == MAT_REUSE_MATRIX) {
3072     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3073     if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3074   } else {
3075     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3076   }
3077   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3078   if (call == MAT_INITIAL_MATRIX) {
3079     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3080     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3081   }
3082   PetscFunctionReturn(0);
3083 }
3084 
3085 #undef __FUNCT__
3086 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3087 /*
3088     Not great since it makes two copies of the submatrix, first an SeqAIJ
3089   in local and then by concatenating the local matrices the end result.
3090   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3091 
3092   Note: This requires a sequential iscol with all indices.
3093 */
3094 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3095 {
3096   PetscErrorCode ierr;
3097   PetscMPIInt    rank,size;
3098   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3099   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3100   Mat            *local,M,Mreuse;
3101   MatScalar      *vwork,*aa;
3102   MPI_Comm       comm = ((PetscObject)mat)->comm;
3103   Mat_SeqAIJ     *aij;
3104 
3105 
3106   PetscFunctionBegin;
3107   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3108   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3109 
3110   if (call ==  MAT_REUSE_MATRIX) {
3111     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3112     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3113     local = &Mreuse;
3114     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3115   } else {
3116     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3117     Mreuse = *local;
3118     ierr   = PetscFree(local);CHKERRQ(ierr);
3119   }
3120 
3121   /*
3122       m - number of local rows
3123       n - number of columns (same on all processors)
3124       rstart - first row in new global matrix generated
3125   */
3126   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3127   if (call == MAT_INITIAL_MATRIX) {
3128     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3129     ii  = aij->i;
3130     jj  = aij->j;
3131 
3132     /*
3133         Determine the number of non-zeros in the diagonal and off-diagonal
3134         portions of the matrix in order to do correct preallocation
3135     */
3136 
3137     /* first get start and end of "diagonal" columns */
3138     if (csize == PETSC_DECIDE) {
3139       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3140       if (mglobal == n) { /* square matrix */
3141 	nlocal = m;
3142       } else {
3143         nlocal = n/size + ((n % size) > rank);
3144       }
3145     } else {
3146       nlocal = csize;
3147     }
3148     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3149     rstart = rend - nlocal;
3150     if (rank == size - 1 && rend != n) {
3151       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3152     }
3153 
3154     /* next, compute all the lengths */
3155     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3156     olens = dlens + m;
3157     for (i=0; i<m; i++) {
3158       jend = ii[i+1] - ii[i];
3159       olen = 0;
3160       dlen = 0;
3161       for (j=0; j<jend; j++) {
3162         if (*jj < rstart || *jj >= rend) olen++;
3163         else dlen++;
3164         jj++;
3165       }
3166       olens[i] = olen;
3167       dlens[i] = dlen;
3168     }
3169     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3170     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3171     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3172     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3173     ierr = PetscFree(dlens);CHKERRQ(ierr);
3174   } else {
3175     PetscInt ml,nl;
3176 
3177     M = *newmat;
3178     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3179     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3180     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3181     /*
3182          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3183        rather than the slower MatSetValues().
3184     */
3185     M->was_assembled = PETSC_TRUE;
3186     M->assembled     = PETSC_FALSE;
3187   }
3188   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3189   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3190   ii  = aij->i;
3191   jj  = aij->j;
3192   aa  = aij->a;
3193   for (i=0; i<m; i++) {
3194     row   = rstart + i;
3195     nz    = ii[i+1] - ii[i];
3196     cwork = jj;     jj += nz;
3197     vwork = aa;     aa += nz;
3198     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3199   }
3200 
3201   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3202   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3203   *newmat = M;
3204 
3205   /* save submatrix used in processor for next request */
3206   if (call ==  MAT_INITIAL_MATRIX) {
3207     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3208     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3209   }
3210 
3211   PetscFunctionReturn(0);
3212 }
3213 
3214 EXTERN_C_BEGIN
3215 #undef __FUNCT__
3216 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3217 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3218 {
3219   PetscInt       m,cstart, cend,j,nnz,i,d;
3220   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3221   const PetscInt *JJ;
3222   PetscScalar    *values;
3223   PetscErrorCode ierr;
3224 
3225   PetscFunctionBegin;
3226   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3227 
3228   ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3229   ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3230   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
3231   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
3232   m      = B->rmap->n;
3233   cstart = B->cmap->rstart;
3234   cend   = B->cmap->rend;
3235   rstart = B->rmap->rstart;
3236 
3237   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
3238   o_nnz = d_nnz + m;
3239 
3240 #if defined(PETSC_USE_DEBUGGING)
3241   for (i=0; i<m; i++) {
3242     nnz     = Ii[i+1]- Ii[i];
3243     JJ      = J + Ii[i];
3244     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3245     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3246     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3247     for (j=1; j<nnz; j++) {
3248       if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j);
3249     }
3250   }
3251 #endif
3252 
3253   for (i=0; i<m; i++) {
3254     nnz     = Ii[i+1]- Ii[i];
3255     JJ      = J + Ii[i];
3256     nnz_max = PetscMax(nnz_max,nnz);
3257     for (j=0; j<nnz; j++) {
3258       if (*JJ >= cstart) break;
3259       JJ++;
3260     }
3261     d = 0;
3262     for (; j<nnz; j++) {
3263       if (*JJ++ >= cend) break;
3264       d++;
3265     }
3266     d_nnz[i] = d;
3267     o_nnz[i] = nnz - d;
3268   }
3269   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3270   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
3271 
3272   if (v) values = (PetscScalar*)v;
3273   else {
3274     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3275     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3276   }
3277 
3278   for (i=0; i<m; i++) {
3279     ii   = i + rstart;
3280     nnz  = Ii[i+1]- Ii[i];
3281     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3282   }
3283   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3284   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3285 
3286   if (!v) {
3287     ierr = PetscFree(values);CHKERRQ(ierr);
3288   }
3289   PetscFunctionReturn(0);
3290 }
3291 EXTERN_C_END
3292 
3293 #undef __FUNCT__
3294 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3295 /*@
3296    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3297    (the default parallel PETSc format).
3298 
3299    Collective on MPI_Comm
3300 
3301    Input Parameters:
3302 +  B - the matrix
3303 .  i - the indices into j for the start of each local row (starts with zero)
3304 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3305 -  v - optional values in the matrix
3306 
3307    Level: developer
3308 
3309    Notes:
3310        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3311      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3312      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3313 
3314        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3315 
3316        The format which is used for the sparse matrix input, is equivalent to a
3317     row-major ordering.. i.e for the following matrix, the input data expected is
3318     as shown:
3319 
3320         1 0 0
3321         2 0 3     P0
3322        -------
3323         4 5 6     P1
3324 
3325      Process0 [P0]: rows_owned=[0,1]
3326         i =  {0,1,3}  [size = nrow+1  = 2+1]
3327         j =  {0,0,2}  [size = nz = 6]
3328         v =  {1,2,3}  [size = nz = 6]
3329 
3330      Process1 [P1]: rows_owned=[2]
3331         i =  {0,3}    [size = nrow+1  = 1+1]
3332         j =  {0,1,2}  [size = nz = 6]
3333         v =  {4,5,6}  [size = nz = 6]
3334 
3335       The column indices for each row MUST be sorted.
3336 
3337 .keywords: matrix, aij, compressed row, sparse, parallel
3338 
3339 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3340           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3341 @*/
3342 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3343 {
3344   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3345 
3346   PetscFunctionBegin;
3347   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3348   if (f) {
3349     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3350   }
3351   PetscFunctionReturn(0);
3352 }
3353 
3354 #undef __FUNCT__
3355 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3356 /*@C
3357    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3358    (the default parallel PETSc format).  For good matrix assembly performance
3359    the user should preallocate the matrix storage by setting the parameters
3360    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3361    performance can be increased by more than a factor of 50.
3362 
3363    Collective on MPI_Comm
3364 
3365    Input Parameters:
3366 +  A - the matrix
3367 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3368            (same value is used for all local rows)
3369 .  d_nnz - array containing the number of nonzeros in the various rows of the
3370            DIAGONAL portion of the local submatrix (possibly different for each row)
3371            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3372            The size of this array is equal to the number of local rows, i.e 'm'.
3373            You must leave room for the diagonal entry even if it is zero.
3374 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3375            submatrix (same value is used for all local rows).
3376 -  o_nnz - array containing the number of nonzeros in the various rows of the
3377            OFF-DIAGONAL portion of the local submatrix (possibly different for
3378            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3379            structure. The size of this array is equal to the number
3380            of local rows, i.e 'm'.
3381 
3382    If the *_nnz parameter is given then the *_nz parameter is ignored
3383 
3384    The AIJ format (also called the Yale sparse matrix format or
3385    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3386    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3387 
3388    The parallel matrix is partitioned such that the first m0 rows belong to
3389    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3390    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3391 
3392    The DIAGONAL portion of the local submatrix of a processor can be defined
3393    as the submatrix which is obtained by extraction the part corresponding
3394    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3395    first row that belongs to the processor, and r2 is the last row belonging
3396    to the this processor. This is a square mxm matrix. The remaining portion
3397    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3398 
3399    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3400 
3401    You can call MatGetInfo() to get information on how effective the preallocation was;
3402    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3403    You can also run with the option -info and look for messages with the string
3404    malloc in them to see if additional memory allocation was needed.
3405 
3406    Example usage:
3407 
3408    Consider the following 8x8 matrix with 34 non-zero values, that is
3409    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3410    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3411    as follows:
3412 
3413 .vb
3414             1  2  0  |  0  3  0  |  0  4
3415     Proc0   0  5  6  |  7  0  0  |  8  0
3416             9  0 10  | 11  0  0  | 12  0
3417     -------------------------------------
3418            13  0 14  | 15 16 17  |  0  0
3419     Proc1   0 18  0  | 19 20 21  |  0  0
3420             0  0  0  | 22 23  0  | 24  0
3421     -------------------------------------
3422     Proc2  25 26 27  |  0  0 28  | 29  0
3423            30  0  0  | 31 32 33  |  0 34
3424 .ve
3425 
3426    This can be represented as a collection of submatrices as:
3427 
3428 .vb
3429       A B C
3430       D E F
3431       G H I
3432 .ve
3433 
3434    Where the submatrices A,B,C are owned by proc0, D,E,F are
3435    owned by proc1, G,H,I are owned by proc2.
3436 
3437    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3438    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3439    The 'M','N' parameters are 8,8, and have the same values on all procs.
3440 
3441    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3442    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3443    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3444    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3445    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3446    matrix, ans [DF] as another SeqAIJ matrix.
3447 
3448    When d_nz, o_nz parameters are specified, d_nz storage elements are
3449    allocated for every row of the local diagonal submatrix, and o_nz
3450    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3451    One way to choose d_nz and o_nz is to use the max nonzerors per local
3452    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3453    In this case, the values of d_nz,o_nz are:
3454 .vb
3455      proc0 : dnz = 2, o_nz = 2
3456      proc1 : dnz = 3, o_nz = 2
3457      proc2 : dnz = 1, o_nz = 4
3458 .ve
3459    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3460    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3461    for proc3. i.e we are using 12+15+10=37 storage locations to store
3462    34 values.
3463 
3464    When d_nnz, o_nnz parameters are specified, the storage is specified
3465    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3466    In the above case the values for d_nnz,o_nnz are:
3467 .vb
3468      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3469      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3470      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3471 .ve
3472    Here the space allocated is sum of all the above values i.e 34, and
3473    hence pre-allocation is perfect.
3474 
3475    Level: intermediate
3476 
3477 .keywords: matrix, aij, compressed row, sparse, parallel
3478 
3479 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3480           MPIAIJ, MatGetInfo()
3481 @*/
3482 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3483 {
3484   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3485 
3486   PetscFunctionBegin;
3487   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3488   if (f) {
3489     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3490   }
3491   PetscFunctionReturn(0);
3492 }
3493 
3494 #undef __FUNCT__
3495 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3496 /*@
3497      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3498          CSR format the local rows.
3499 
3500    Collective on MPI_Comm
3501 
3502    Input Parameters:
3503 +  comm - MPI communicator
3504 .  m - number of local rows (Cannot be PETSC_DECIDE)
3505 .  n - This value should be the same as the local size used in creating the
3506        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3507        calculated if N is given) For square matrices n is almost always m.
3508 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3509 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3510 .   i - row indices
3511 .   j - column indices
3512 -   a - matrix values
3513 
3514    Output Parameter:
3515 .   mat - the matrix
3516 
3517    Level: intermediate
3518 
3519    Notes:
3520        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3521      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3522      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3523 
3524        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3525 
3526        The format which is used for the sparse matrix input, is equivalent to a
3527     row-major ordering.. i.e for the following matrix, the input data expected is
3528     as shown:
3529 
3530         1 0 0
3531         2 0 3     P0
3532        -------
3533         4 5 6     P1
3534 
3535      Process0 [P0]: rows_owned=[0,1]
3536         i =  {0,1,3}  [size = nrow+1  = 2+1]
3537         j =  {0,0,2}  [size = nz = 6]
3538         v =  {1,2,3}  [size = nz = 6]
3539 
3540      Process1 [P1]: rows_owned=[2]
3541         i =  {0,3}    [size = nrow+1  = 1+1]
3542         j =  {0,1,2}  [size = nz = 6]
3543         v =  {4,5,6}  [size = nz = 6]
3544 
3545 .keywords: matrix, aij, compressed row, sparse, parallel
3546 
3547 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3548           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3549 @*/
3550 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3551 {
3552   PetscErrorCode ierr;
3553 
3554  PetscFunctionBegin;
3555   if (i[0]) {
3556     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3557   }
3558   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3559   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3560   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3561   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3562   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3563   PetscFunctionReturn(0);
3564 }
3565 
3566 #undef __FUNCT__
3567 #define __FUNCT__ "MatCreateMPIAIJ"
3568 /*@C
3569    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3570    (the default parallel PETSc format).  For good matrix assembly performance
3571    the user should preallocate the matrix storage by setting the parameters
3572    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3573    performance can be increased by more than a factor of 50.
3574 
3575    Collective on MPI_Comm
3576 
3577    Input Parameters:
3578 +  comm - MPI communicator
3579 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3580            This value should be the same as the local size used in creating the
3581            y vector for the matrix-vector product y = Ax.
3582 .  n - This value should be the same as the local size used in creating the
3583        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3584        calculated if N is given) For square matrices n is almost always m.
3585 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3586 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3587 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3588            (same value is used for all local rows)
3589 .  d_nnz - array containing the number of nonzeros in the various rows of the
3590            DIAGONAL portion of the local submatrix (possibly different for each row)
3591            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3592            The size of this array is equal to the number of local rows, i.e 'm'.
3593            You must leave room for the diagonal entry even if it is zero.
3594 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3595            submatrix (same value is used for all local rows).
3596 -  o_nnz - array containing the number of nonzeros in the various rows of the
3597            OFF-DIAGONAL portion of the local submatrix (possibly different for
3598            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3599            structure. The size of this array is equal to the number
3600            of local rows, i.e 'm'.
3601 
3602    Output Parameter:
3603 .  A - the matrix
3604 
3605    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3606    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3607    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3608 
3609    Notes:
3610    If the *_nnz parameter is given then the *_nz parameter is ignored
3611 
3612    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3613    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3614    storage requirements for this matrix.
3615 
3616    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3617    processor than it must be used on all processors that share the object for
3618    that argument.
3619 
3620    The user MUST specify either the local or global matrix dimensions
3621    (possibly both).
3622 
3623    The parallel matrix is partitioned across processors such that the
3624    first m0 rows belong to process 0, the next m1 rows belong to
3625    process 1, the next m2 rows belong to process 2 etc.. where
3626    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3627    values corresponding to [m x N] submatrix.
3628 
3629    The columns are logically partitioned with the n0 columns belonging
3630    to 0th partition, the next n1 columns belonging to the next
3631    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3632 
3633    The DIAGONAL portion of the local submatrix on any given processor
3634    is the submatrix corresponding to the rows and columns m,n
3635    corresponding to the given processor. i.e diagonal matrix on
3636    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3637    etc. The remaining portion of the local submatrix [m x (N-n)]
3638    constitute the OFF-DIAGONAL portion. The example below better
3639    illustrates this concept.
3640 
3641    For a square global matrix we define each processor's diagonal portion
3642    to be its local rows and the corresponding columns (a square submatrix);
3643    each processor's off-diagonal portion encompasses the remainder of the
3644    local matrix (a rectangular submatrix).
3645 
3646    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3647 
3648    When calling this routine with a single process communicator, a matrix of
3649    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3650    type of communicator, use the construction mechanism:
3651      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3652 
3653    By default, this format uses inodes (identical nodes) when possible.
3654    We search for consecutive rows with the same nonzero structure, thereby
3655    reusing matrix information to achieve increased efficiency.
3656 
3657    Options Database Keys:
3658 +  -mat_no_inode  - Do not use inodes
3659 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3660 -  -mat_aij_oneindex - Internally use indexing starting at 1
3661         rather than 0.  Note that when calling MatSetValues(),
3662         the user still MUST index entries starting at 0!
3663 
3664 
3665    Example usage:
3666 
3667    Consider the following 8x8 matrix with 34 non-zero values, that is
3668    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3669    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3670    as follows:
3671 
3672 .vb
3673             1  2  0  |  0  3  0  |  0  4
3674     Proc0   0  5  6  |  7  0  0  |  8  0
3675             9  0 10  | 11  0  0  | 12  0
3676     -------------------------------------
3677            13  0 14  | 15 16 17  |  0  0
3678     Proc1   0 18  0  | 19 20 21  |  0  0
3679             0  0  0  | 22 23  0  | 24  0
3680     -------------------------------------
3681     Proc2  25 26 27  |  0  0 28  | 29  0
3682            30  0  0  | 31 32 33  |  0 34
3683 .ve
3684 
3685    This can be represented as a collection of submatrices as:
3686 
3687 .vb
3688       A B C
3689       D E F
3690       G H I
3691 .ve
3692 
3693    Where the submatrices A,B,C are owned by proc0, D,E,F are
3694    owned by proc1, G,H,I are owned by proc2.
3695 
3696    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3697    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3698    The 'M','N' parameters are 8,8, and have the same values on all procs.
3699 
3700    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3701    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3702    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3703    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3704    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3705    matrix, ans [DF] as another SeqAIJ matrix.
3706 
3707    When d_nz, o_nz parameters are specified, d_nz storage elements are
3708    allocated for every row of the local diagonal submatrix, and o_nz
3709    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3710    One way to choose d_nz and o_nz is to use the max nonzerors per local
3711    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3712    In this case, the values of d_nz,o_nz are:
3713 .vb
3714      proc0 : dnz = 2, o_nz = 2
3715      proc1 : dnz = 3, o_nz = 2
3716      proc2 : dnz = 1, o_nz = 4
3717 .ve
3718    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3719    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3720    for proc3. i.e we are using 12+15+10=37 storage locations to store
3721    34 values.
3722 
3723    When d_nnz, o_nnz parameters are specified, the storage is specified
3724    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3725    In the above case the values for d_nnz,o_nnz are:
3726 .vb
3727      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3728      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3729      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3730 .ve
3731    Here the space allocated is sum of all the above values i.e 34, and
3732    hence pre-allocation is perfect.
3733 
3734    Level: intermediate
3735 
3736 .keywords: matrix, aij, compressed row, sparse, parallel
3737 
3738 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3739           MPIAIJ, MatCreateMPIAIJWithArrays()
3740 @*/
3741 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3742 {
3743   PetscErrorCode ierr;
3744   PetscMPIInt    size;
3745 
3746   PetscFunctionBegin;
3747   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3748   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3749   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3750   if (size > 1) {
3751     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3752     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3753   } else {
3754     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3755     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3756   }
3757   PetscFunctionReturn(0);
3758 }
3759 
3760 #undef __FUNCT__
3761 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3762 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3763 {
3764   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3765 
3766   PetscFunctionBegin;
3767   *Ad     = a->A;
3768   *Ao     = a->B;
3769   *colmap = a->garray;
3770   PetscFunctionReturn(0);
3771 }
3772 
3773 #undef __FUNCT__
3774 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3775 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3776 {
3777   PetscErrorCode ierr;
3778   PetscInt       i;
3779   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3780 
3781   PetscFunctionBegin;
3782   if (coloring->ctype == IS_COLORING_GLOBAL) {
3783     ISColoringValue *allcolors,*colors;
3784     ISColoring      ocoloring;
3785 
3786     /* set coloring for diagonal portion */
3787     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3788 
3789     /* set coloring for off-diagonal portion */
3790     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3791     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3792     for (i=0; i<a->B->cmap->n; i++) {
3793       colors[i] = allcolors[a->garray[i]];
3794     }
3795     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3796     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3797     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3798     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3799   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3800     ISColoringValue *colors;
3801     PetscInt        *larray;
3802     ISColoring      ocoloring;
3803 
3804     /* set coloring for diagonal portion */
3805     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3806     for (i=0; i<a->A->cmap->n; i++) {
3807       larray[i] = i + A->cmap->rstart;
3808     }
3809     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3810     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3811     for (i=0; i<a->A->cmap->n; i++) {
3812       colors[i] = coloring->colors[larray[i]];
3813     }
3814     ierr = PetscFree(larray);CHKERRQ(ierr);
3815     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3816     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3817     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3818 
3819     /* set coloring for off-diagonal portion */
3820     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3821     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3822     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3823     for (i=0; i<a->B->cmap->n; i++) {
3824       colors[i] = coloring->colors[larray[i]];
3825     }
3826     ierr = PetscFree(larray);CHKERRQ(ierr);
3827     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3828     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3829     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3830   } else {
3831     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3832   }
3833 
3834   PetscFunctionReturn(0);
3835 }
3836 
3837 #if defined(PETSC_HAVE_ADIC)
3838 #undef __FUNCT__
3839 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3840 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3841 {
3842   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3843   PetscErrorCode ierr;
3844 
3845   PetscFunctionBegin;
3846   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3847   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3848   PetscFunctionReturn(0);
3849 }
3850 #endif
3851 
3852 #undef __FUNCT__
3853 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3854 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3855 {
3856   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3857   PetscErrorCode ierr;
3858 
3859   PetscFunctionBegin;
3860   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3861   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3862   PetscFunctionReturn(0);
3863 }
3864 
3865 #undef __FUNCT__
3866 #define __FUNCT__ "MatMerge"
3867 /*@
3868       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3869                  matrices from each processor
3870 
3871     Collective on MPI_Comm
3872 
3873    Input Parameters:
3874 +    comm - the communicators the parallel matrix will live on
3875 .    inmat - the input sequential matrices
3876 .    n - number of local columns (or PETSC_DECIDE)
3877 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3878 
3879    Output Parameter:
3880 .    outmat - the parallel matrix generated
3881 
3882     Level: advanced
3883 
3884    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3885 
3886 @*/
3887 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3888 {
3889   PetscErrorCode ierr;
3890   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3891   PetscInt       *indx;
3892   PetscScalar    *values;
3893 
3894   PetscFunctionBegin;
3895   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3896   if (scall == MAT_INITIAL_MATRIX){
3897     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3898     if (n == PETSC_DECIDE){
3899       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3900     }
3901     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3902     rstart -= m;
3903 
3904     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3905     for (i=0;i<m;i++) {
3906       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3907       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3908       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3909     }
3910     /* This routine will ONLY return MPIAIJ type matrix */
3911     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3912     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3913     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3914     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3915     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3916 
3917   } else if (scall == MAT_REUSE_MATRIX){
3918     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3919   } else {
3920     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3921   }
3922 
3923   for (i=0;i<m;i++) {
3924     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3925     Ii    = i + rstart;
3926     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3927     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3928   }
3929   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3930   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3931   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3932 
3933   PetscFunctionReturn(0);
3934 }
3935 
3936 #undef __FUNCT__
3937 #define __FUNCT__ "MatFileSplit"
3938 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3939 {
3940   PetscErrorCode    ierr;
3941   PetscMPIInt       rank;
3942   PetscInt          m,N,i,rstart,nnz;
3943   size_t            len;
3944   const PetscInt    *indx;
3945   PetscViewer       out;
3946   char              *name;
3947   Mat               B;
3948   const PetscScalar *values;
3949 
3950   PetscFunctionBegin;
3951   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3952   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3953   /* Should this be the type of the diagonal block of A? */
3954   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3955   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3956   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3957   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3958   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3959   for (i=0;i<m;i++) {
3960     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3961     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3962     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3963   }
3964   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3965   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3966 
3967   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3968   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3969   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3970   sprintf(name,"%s.%d",outfile,rank);
3971   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3972   ierr = PetscFree(name);
3973   ierr = MatView(B,out);CHKERRQ(ierr);
3974   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3975   ierr = MatDestroy(B);CHKERRQ(ierr);
3976   PetscFunctionReturn(0);
3977 }
3978 
3979 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3980 #undef __FUNCT__
3981 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3982 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3983 {
3984   PetscErrorCode       ierr;
3985   Mat_Merge_SeqsToMPI  *merge;
3986   PetscContainer       container;
3987 
3988   PetscFunctionBegin;
3989   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3990   if (container) {
3991     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3992     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3993     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3994     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3995     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3996     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3997     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3998     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3999     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4000     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4001     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4002     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
4003 
4004     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4005     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4006   }
4007   ierr = PetscFree(merge);CHKERRQ(ierr);
4008 
4009   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4010   PetscFunctionReturn(0);
4011 }
4012 
4013 #include "../src/mat/utils/freespace.h"
4014 #include "petscbt.h"
4015 
4016 #undef __FUNCT__
4017 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4018 /*@C
4019       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4020                  matrices from each processor
4021 
4022     Collective on MPI_Comm
4023 
4024    Input Parameters:
4025 +    comm - the communicators the parallel matrix will live on
4026 .    seqmat - the input sequential matrices
4027 .    m - number of local rows (or PETSC_DECIDE)
4028 .    n - number of local columns (or PETSC_DECIDE)
4029 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4030 
4031    Output Parameter:
4032 .    mpimat - the parallel matrix generated
4033 
4034     Level: advanced
4035 
4036    Notes:
4037      The dimensions of the sequential matrix in each processor MUST be the same.
4038      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4039      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4040 @*/
4041 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4042 {
4043   PetscErrorCode       ierr;
4044   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4045   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4046   PetscMPIInt          size,rank,taga,*len_s;
4047   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4048   PetscInt             proc,m;
4049   PetscInt             **buf_ri,**buf_rj;
4050   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4051   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4052   MPI_Request          *s_waits,*r_waits;
4053   MPI_Status           *status;
4054   MatScalar            *aa=a->a;
4055   MatScalar            **abuf_r,*ba_i;
4056   Mat_Merge_SeqsToMPI  *merge;
4057   PetscContainer       container;
4058 
4059   PetscFunctionBegin;
4060   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4061 
4062   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4063   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4064 
4065   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4066   if (container) {
4067     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4068   }
4069   bi     = merge->bi;
4070   bj     = merge->bj;
4071   buf_ri = merge->buf_ri;
4072   buf_rj = merge->buf_rj;
4073 
4074   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4075   owners = merge->rowmap.range;
4076   len_s  = merge->len_s;
4077 
4078   /* send and recv matrix values */
4079   /*-----------------------------*/
4080   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4081   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4082 
4083   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4084   for (proc=0,k=0; proc<size; proc++){
4085     if (!len_s[proc]) continue;
4086     i = owners[proc];
4087     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4088     k++;
4089   }
4090 
4091   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4092   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4093   ierr = PetscFree(status);CHKERRQ(ierr);
4094 
4095   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4096   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4097 
4098   /* insert mat values of mpimat */
4099   /*----------------------------*/
4100   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4101   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4102   nextrow = buf_ri_k + merge->nrecv;
4103   nextai  = nextrow + merge->nrecv;
4104 
4105   for (k=0; k<merge->nrecv; k++){
4106     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4107     nrows = *(buf_ri_k[k]);
4108     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4109     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4110   }
4111 
4112   /* set values of ba */
4113   m = merge->rowmap.n;
4114   for (i=0; i<m; i++) {
4115     arow = owners[rank] + i;
4116     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4117     bnzi = bi[i+1] - bi[i];
4118     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4119 
4120     /* add local non-zero vals of this proc's seqmat into ba */
4121     anzi = ai[arow+1] - ai[arow];
4122     aj   = a->j + ai[arow];
4123     aa   = a->a + ai[arow];
4124     nextaj = 0;
4125     for (j=0; nextaj<anzi; j++){
4126       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4127         ba_i[j] += aa[nextaj++];
4128       }
4129     }
4130 
4131     /* add received vals into ba */
4132     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4133       /* i-th row */
4134       if (i == *nextrow[k]) {
4135         anzi = *(nextai[k]+1) - *nextai[k];
4136         aj   = buf_rj[k] + *(nextai[k]);
4137         aa   = abuf_r[k] + *(nextai[k]);
4138         nextaj = 0;
4139         for (j=0; nextaj<anzi; j++){
4140           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4141             ba_i[j] += aa[nextaj++];
4142           }
4143         }
4144         nextrow[k]++; nextai[k]++;
4145       }
4146     }
4147     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4148   }
4149   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4150   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4151 
4152   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4153   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4154   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4155   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4156   PetscFunctionReturn(0);
4157 }
4158 
4159 #undef __FUNCT__
4160 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4161 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4162 {
4163   PetscErrorCode       ierr;
4164   Mat                  B_mpi;
4165   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4166   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4167   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4168   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4169   PetscInt             len,proc,*dnz,*onz;
4170   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4171   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4172   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4173   MPI_Status           *status;
4174   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4175   PetscBT              lnkbt;
4176   Mat_Merge_SeqsToMPI  *merge;
4177   PetscContainer       container;
4178 
4179   PetscFunctionBegin;
4180   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4181 
4182   /* make sure it is a PETSc comm */
4183   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4184   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4185   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4186 
4187   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4188   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4189 
4190   /* determine row ownership */
4191   /*---------------------------------------------------------*/
4192   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
4193   merge->rowmap.n = m;
4194   merge->rowmap.N = M;
4195   merge->rowmap.bs = 1;
4196   ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr);
4197   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4198   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4199 
4200   m      = merge->rowmap.n;
4201   M      = merge->rowmap.N;
4202   owners = merge->rowmap.range;
4203 
4204   /* determine the number of messages to send, their lengths */
4205   /*---------------------------------------------------------*/
4206   len_s  = merge->len_s;
4207 
4208   len = 0;  /* length of buf_si[] */
4209   merge->nsend = 0;
4210   for (proc=0; proc<size; proc++){
4211     len_si[proc] = 0;
4212     if (proc == rank){
4213       len_s[proc] = 0;
4214     } else {
4215       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4216       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4217     }
4218     if (len_s[proc]) {
4219       merge->nsend++;
4220       nrows = 0;
4221       for (i=owners[proc]; i<owners[proc+1]; i++){
4222         if (ai[i+1] > ai[i]) nrows++;
4223       }
4224       len_si[proc] = 2*(nrows+1);
4225       len += len_si[proc];
4226     }
4227   }
4228 
4229   /* determine the number and length of messages to receive for ij-structure */
4230   /*-------------------------------------------------------------------------*/
4231   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4232   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4233 
4234   /* post the Irecv of j-structure */
4235   /*-------------------------------*/
4236   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4237   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4238 
4239   /* post the Isend of j-structure */
4240   /*--------------------------------*/
4241   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
4242   sj_waits = si_waits + merge->nsend;
4243 
4244   for (proc=0, k=0; proc<size; proc++){
4245     if (!len_s[proc]) continue;
4246     i = owners[proc];
4247     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4248     k++;
4249   }
4250 
4251   /* receives and sends of j-structure are complete */
4252   /*------------------------------------------------*/
4253   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4254   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4255 
4256   /* send and recv i-structure */
4257   /*---------------------------*/
4258   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4259   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4260 
4261   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4262   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4263   for (proc=0,k=0; proc<size; proc++){
4264     if (!len_s[proc]) continue;
4265     /* form outgoing message for i-structure:
4266          buf_si[0]:                 nrows to be sent
4267                [1:nrows]:           row index (global)
4268                [nrows+1:2*nrows+1]: i-structure index
4269     */
4270     /*-------------------------------------------*/
4271     nrows = len_si[proc]/2 - 1;
4272     buf_si_i    = buf_si + nrows+1;
4273     buf_si[0]   = nrows;
4274     buf_si_i[0] = 0;
4275     nrows = 0;
4276     for (i=owners[proc]; i<owners[proc+1]; i++){
4277       anzi = ai[i+1] - ai[i];
4278       if (anzi) {
4279         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4280         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4281         nrows++;
4282       }
4283     }
4284     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4285     k++;
4286     buf_si += len_si[proc];
4287   }
4288 
4289   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4290   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4291 
4292   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4293   for (i=0; i<merge->nrecv; i++){
4294     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4295   }
4296 
4297   ierr = PetscFree(len_si);CHKERRQ(ierr);
4298   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4299   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4300   ierr = PetscFree(si_waits);CHKERRQ(ierr);
4301   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4302   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4303   ierr = PetscFree(status);CHKERRQ(ierr);
4304 
4305   /* compute a local seq matrix in each processor */
4306   /*----------------------------------------------*/
4307   /* allocate bi array and free space for accumulating nonzero column info */
4308   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4309   bi[0] = 0;
4310 
4311   /* create and initialize a linked list */
4312   nlnk = N+1;
4313   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4314 
4315   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4316   len = 0;
4317   len  = ai[owners[rank+1]] - ai[owners[rank]];
4318   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4319   current_space = free_space;
4320 
4321   /* determine symbolic info for each local row */
4322   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4323   nextrow = buf_ri_k + merge->nrecv;
4324   nextai  = nextrow + merge->nrecv;
4325   for (k=0; k<merge->nrecv; k++){
4326     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4327     nrows = *buf_ri_k[k];
4328     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4329     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4330   }
4331 
4332   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4333   len = 0;
4334   for (i=0;i<m;i++) {
4335     bnzi   = 0;
4336     /* add local non-zero cols of this proc's seqmat into lnk */
4337     arow   = owners[rank] + i;
4338     anzi   = ai[arow+1] - ai[arow];
4339     aj     = a->j + ai[arow];
4340     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4341     bnzi += nlnk;
4342     /* add received col data into lnk */
4343     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4344       if (i == *nextrow[k]) { /* i-th row */
4345         anzi = *(nextai[k]+1) - *nextai[k];
4346         aj   = buf_rj[k] + *nextai[k];
4347         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4348         bnzi += nlnk;
4349         nextrow[k]++; nextai[k]++;
4350       }
4351     }
4352     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4353 
4354     /* if free space is not available, make more free space */
4355     if (current_space->local_remaining<bnzi) {
4356       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4357       nspacedouble++;
4358     }
4359     /* copy data into free space, then initialize lnk */
4360     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4361     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4362 
4363     current_space->array           += bnzi;
4364     current_space->local_used      += bnzi;
4365     current_space->local_remaining -= bnzi;
4366 
4367     bi[i+1] = bi[i] + bnzi;
4368   }
4369 
4370   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4371 
4372   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4373   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4374   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4375 
4376   /* create symbolic parallel matrix B_mpi */
4377   /*---------------------------------------*/
4378   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4379   if (n==PETSC_DECIDE) {
4380     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4381   } else {
4382     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4383   }
4384   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4385   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4386   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4387 
4388   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4389   B_mpi->assembled     = PETSC_FALSE;
4390   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4391   merge->bi            = bi;
4392   merge->bj            = bj;
4393   merge->buf_ri        = buf_ri;
4394   merge->buf_rj        = buf_rj;
4395   merge->coi           = PETSC_NULL;
4396   merge->coj           = PETSC_NULL;
4397   merge->owners_co     = PETSC_NULL;
4398 
4399   /* attach the supporting struct to B_mpi for reuse */
4400   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4401   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4402   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4403   *mpimat = B_mpi;
4404 
4405   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4406   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4407   PetscFunctionReturn(0);
4408 }
4409 
4410 #undef __FUNCT__
4411 #define __FUNCT__ "MatMerge_SeqsToMPI"
4412 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4413 {
4414   PetscErrorCode   ierr;
4415 
4416   PetscFunctionBegin;
4417   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4418   if (scall == MAT_INITIAL_MATRIX){
4419     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4420   }
4421   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4422   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4423   PetscFunctionReturn(0);
4424 }
4425 
4426 #undef __FUNCT__
4427 #define __FUNCT__ "MatGetLocalMat"
4428 /*@
4429      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4430 
4431     Not Collective
4432 
4433    Input Parameters:
4434 +    A - the matrix
4435 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4436 
4437    Output Parameter:
4438 .    A_loc - the local sequential matrix generated
4439 
4440     Level: developer
4441 
4442 @*/
4443 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4444 {
4445   PetscErrorCode  ierr;
4446   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4447   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4448   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4449   MatScalar       *aa=a->a,*ba=b->a,*cam;
4450   PetscScalar     *ca;
4451   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4452   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4453 
4454   PetscFunctionBegin;
4455   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4456   if (scall == MAT_INITIAL_MATRIX){
4457     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4458     ci[0] = 0;
4459     for (i=0; i<am; i++){
4460       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4461     }
4462     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4463     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4464     k = 0;
4465     for (i=0; i<am; i++) {
4466       ncols_o = bi[i+1] - bi[i];
4467       ncols_d = ai[i+1] - ai[i];
4468       /* off-diagonal portion of A */
4469       for (jo=0; jo<ncols_o; jo++) {
4470         col = cmap[*bj];
4471         if (col >= cstart) break;
4472         cj[k]   = col; bj++;
4473         ca[k++] = *ba++;
4474       }
4475       /* diagonal portion of A */
4476       for (j=0; j<ncols_d; j++) {
4477         cj[k]   = cstart + *aj++;
4478         ca[k++] = *aa++;
4479       }
4480       /* off-diagonal portion of A */
4481       for (j=jo; j<ncols_o; j++) {
4482         cj[k]   = cmap[*bj++];
4483         ca[k++] = *ba++;
4484       }
4485     }
4486     /* put together the new matrix */
4487     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4488     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4489     /* Since these are PETSc arrays, change flags to free them as necessary. */
4490     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4491     mat->free_a  = PETSC_TRUE;
4492     mat->free_ij = PETSC_TRUE;
4493     mat->nonew   = 0;
4494   } else if (scall == MAT_REUSE_MATRIX){
4495     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4496     ci = mat->i; cj = mat->j; cam = mat->a;
4497     for (i=0; i<am; i++) {
4498       /* off-diagonal portion of A */
4499       ncols_o = bi[i+1] - bi[i];
4500       for (jo=0; jo<ncols_o; jo++) {
4501         col = cmap[*bj];
4502         if (col >= cstart) break;
4503         *cam++ = *ba++; bj++;
4504       }
4505       /* diagonal portion of A */
4506       ncols_d = ai[i+1] - ai[i];
4507       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4508       /* off-diagonal portion of A */
4509       for (j=jo; j<ncols_o; j++) {
4510         *cam++ = *ba++; bj++;
4511       }
4512     }
4513   } else {
4514     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4515   }
4516 
4517   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4518   PetscFunctionReturn(0);
4519 }
4520 
4521 #undef __FUNCT__
4522 #define __FUNCT__ "MatGetLocalMatCondensed"
4523 /*@C
4524      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4525 
4526     Not Collective
4527 
4528    Input Parameters:
4529 +    A - the matrix
4530 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4531 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4532 
4533    Output Parameter:
4534 .    A_loc - the local sequential matrix generated
4535 
4536     Level: developer
4537 
4538 @*/
4539 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4540 {
4541   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4542   PetscErrorCode    ierr;
4543   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4544   IS                isrowa,iscola;
4545   Mat               *aloc;
4546 
4547   PetscFunctionBegin;
4548   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4549   if (!row){
4550     start = A->rmap->rstart; end = A->rmap->rend;
4551     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4552   } else {
4553     isrowa = *row;
4554   }
4555   if (!col){
4556     start = A->cmap->rstart;
4557     cmap  = a->garray;
4558     nzA   = a->A->cmap->n;
4559     nzB   = a->B->cmap->n;
4560     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4561     ncols = 0;
4562     for (i=0; i<nzB; i++) {
4563       if (cmap[i] < start) idx[ncols++] = cmap[i];
4564       else break;
4565     }
4566     imark = i;
4567     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4568     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4569     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4570     ierr = PetscFree(idx);CHKERRQ(ierr);
4571   } else {
4572     iscola = *col;
4573   }
4574   if (scall != MAT_INITIAL_MATRIX){
4575     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4576     aloc[0] = *A_loc;
4577   }
4578   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4579   *A_loc = aloc[0];
4580   ierr = PetscFree(aloc);CHKERRQ(ierr);
4581   if (!row){
4582     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4583   }
4584   if (!col){
4585     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4586   }
4587   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4588   PetscFunctionReturn(0);
4589 }
4590 
4591 #undef __FUNCT__
4592 #define __FUNCT__ "MatGetBrowsOfAcols"
4593 /*@C
4594     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4595 
4596     Collective on Mat
4597 
4598    Input Parameters:
4599 +    A,B - the matrices in mpiaij format
4600 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4601 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4602 
4603    Output Parameter:
4604 +    rowb, colb - index sets of rows and columns of B to extract
4605 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4606 -    B_seq - the sequential matrix generated
4607 
4608     Level: developer
4609 
4610 @*/
4611 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4612 {
4613   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4614   PetscErrorCode    ierr;
4615   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4616   IS                isrowb,iscolb;
4617   Mat               *bseq;
4618 
4619   PetscFunctionBegin;
4620   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4621     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4622   }
4623   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4624 
4625   if (scall == MAT_INITIAL_MATRIX){
4626     start = A->cmap->rstart;
4627     cmap  = a->garray;
4628     nzA   = a->A->cmap->n;
4629     nzB   = a->B->cmap->n;
4630     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4631     ncols = 0;
4632     for (i=0; i<nzB; i++) {  /* row < local row index */
4633       if (cmap[i] < start) idx[ncols++] = cmap[i];
4634       else break;
4635     }
4636     imark = i;
4637     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4638     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4639     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4640     ierr = PetscFree(idx);CHKERRQ(ierr);
4641     *brstart = imark;
4642     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4643   } else {
4644     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4645     isrowb = *rowb; iscolb = *colb;
4646     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4647     bseq[0] = *B_seq;
4648   }
4649   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4650   *B_seq = bseq[0];
4651   ierr = PetscFree(bseq);CHKERRQ(ierr);
4652   if (!rowb){
4653     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4654   } else {
4655     *rowb = isrowb;
4656   }
4657   if (!colb){
4658     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4659   } else {
4660     *colb = iscolb;
4661   }
4662   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4663   PetscFunctionReturn(0);
4664 }
4665 
4666 #undef __FUNCT__
4667 #define __FUNCT__ "MatGetBrowsOfAoCols"
4668 /*@C
4669     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4670     of the OFF-DIAGONAL portion of local A
4671 
4672     Collective on Mat
4673 
4674    Input Parameters:
4675 +    A,B - the matrices in mpiaij format
4676 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4677 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4678 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4679 
4680    Output Parameter:
4681 +    B_oth - the sequential matrix generated
4682 
4683     Level: developer
4684 
4685 @*/
4686 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth)
4687 {
4688   VecScatter_MPI_General *gen_to,*gen_from;
4689   PetscErrorCode         ierr;
4690   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4691   Mat_SeqAIJ             *b_oth;
4692   VecScatter             ctx=a->Mvctx;
4693   MPI_Comm               comm=((PetscObject)ctx)->comm;
4694   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4695   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4696   PetscScalar            *rvalues,*svalues;
4697   MatScalar              *b_otha,*bufa,*bufA;
4698   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4699   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4700   MPI_Status             *sstatus,rstatus;
4701   PetscMPIInt            jj;
4702   PetscInt               *cols,sbs,rbs;
4703   PetscScalar            *vals;
4704 
4705   PetscFunctionBegin;
4706   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4707     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4708   }
4709   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4710   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4711 
4712   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4713   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4714   rvalues  = gen_from->values; /* holds the length of receiving row */
4715   svalues  = gen_to->values;   /* holds the length of sending row */
4716   nrecvs   = gen_from->n;
4717   nsends   = gen_to->n;
4718 
4719   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4720   srow     = gen_to->indices;   /* local row index to be sent */
4721   sstarts  = gen_to->starts;
4722   sprocs   = gen_to->procs;
4723   sstatus  = gen_to->sstatus;
4724   sbs      = gen_to->bs;
4725   rstarts  = gen_from->starts;
4726   rprocs   = gen_from->procs;
4727   rbs      = gen_from->bs;
4728 
4729   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4730   if (scall == MAT_INITIAL_MATRIX){
4731     /* i-array */
4732     /*---------*/
4733     /*  post receives */
4734     for (i=0; i<nrecvs; i++){
4735       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4736       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4737       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4738     }
4739 
4740     /* pack the outgoing message */
4741     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4742     rstartsj = sstartsj + nsends +1;
4743     sstartsj[0] = 0;  rstartsj[0] = 0;
4744     len = 0; /* total length of j or a array to be sent */
4745     k = 0;
4746     for (i=0; i<nsends; i++){
4747       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4748       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4749       for (j=0; j<nrows; j++) {
4750         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4751         for (l=0; l<sbs; l++){
4752           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4753           rowlen[j*sbs+l] = ncols;
4754           len += ncols;
4755           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4756         }
4757         k++;
4758       }
4759       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4760       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4761     }
4762     /* recvs and sends of i-array are completed */
4763     i = nrecvs;
4764     while (i--) {
4765       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4766     }
4767     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4768 
4769     /* allocate buffers for sending j and a arrays */
4770     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4771     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4772 
4773     /* create i-array of B_oth */
4774     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4775     b_othi[0] = 0;
4776     len = 0; /* total length of j or a array to be received */
4777     k = 0;
4778     for (i=0; i<nrecvs; i++){
4779       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4780       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4781       for (j=0; j<nrows; j++) {
4782         b_othi[k+1] = b_othi[k] + rowlen[j];
4783         len += rowlen[j]; k++;
4784       }
4785       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4786     }
4787 
4788     /* allocate space for j and a arrrays of B_oth */
4789     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4790     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4791 
4792     /* j-array */
4793     /*---------*/
4794     /*  post receives of j-array */
4795     for (i=0; i<nrecvs; i++){
4796       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4797       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4798     }
4799 
4800     /* pack the outgoing message j-array */
4801     k = 0;
4802     for (i=0; i<nsends; i++){
4803       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4804       bufJ = bufj+sstartsj[i];
4805       for (j=0; j<nrows; j++) {
4806         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4807         for (ll=0; ll<sbs; ll++){
4808           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4809           for (l=0; l<ncols; l++){
4810             *bufJ++ = cols[l];
4811           }
4812           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4813         }
4814       }
4815       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4816     }
4817 
4818     /* recvs and sends of j-array are completed */
4819     i = nrecvs;
4820     while (i--) {
4821       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4822     }
4823     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4824   } else if (scall == MAT_REUSE_MATRIX){
4825     sstartsj = *startsj;
4826     rstartsj = sstartsj + nsends +1;
4827     bufa     = *bufa_ptr;
4828     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4829     b_otha   = b_oth->a;
4830   } else {
4831     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4832   }
4833 
4834   /* a-array */
4835   /*---------*/
4836   /*  post receives of a-array */
4837   for (i=0; i<nrecvs; i++){
4838     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4839     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4840   }
4841 
4842   /* pack the outgoing message a-array */
4843   k = 0;
4844   for (i=0; i<nsends; i++){
4845     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4846     bufA = bufa+sstartsj[i];
4847     for (j=0; j<nrows; j++) {
4848       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4849       for (ll=0; ll<sbs; ll++){
4850         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4851         for (l=0; l<ncols; l++){
4852           *bufA++ = vals[l];
4853         }
4854         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4855       }
4856     }
4857     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4858   }
4859   /* recvs and sends of a-array are completed */
4860   i = nrecvs;
4861   while (i--) {
4862     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4863   }
4864   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4865   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4866 
4867   if (scall == MAT_INITIAL_MATRIX){
4868     /* put together the new matrix */
4869     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4870 
4871     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4872     /* Since these are PETSc arrays, change flags to free them as necessary. */
4873     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4874     b_oth->free_a  = PETSC_TRUE;
4875     b_oth->free_ij = PETSC_TRUE;
4876     b_oth->nonew   = 0;
4877 
4878     ierr = PetscFree(bufj);CHKERRQ(ierr);
4879     if (!startsj || !bufa_ptr){
4880       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4881       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4882     } else {
4883       *startsj  = sstartsj;
4884       *bufa_ptr = bufa;
4885     }
4886   }
4887   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4888   PetscFunctionReturn(0);
4889 }
4890 
4891 #undef __FUNCT__
4892 #define __FUNCT__ "MatGetCommunicationStructs"
4893 /*@C
4894   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4895 
4896   Not Collective
4897 
4898   Input Parameters:
4899 . A - The matrix in mpiaij format
4900 
4901   Output Parameter:
4902 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4903 . colmap - A map from global column index to local index into lvec
4904 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4905 
4906   Level: developer
4907 
4908 @*/
4909 #if defined (PETSC_USE_CTABLE)
4910 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4911 #else
4912 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4913 #endif
4914 {
4915   Mat_MPIAIJ *a;
4916 
4917   PetscFunctionBegin;
4918   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4919   PetscValidPointer(lvec, 2)
4920   PetscValidPointer(colmap, 3)
4921   PetscValidPointer(multScatter, 4)
4922   a = (Mat_MPIAIJ *) A->data;
4923   if (lvec) *lvec = a->lvec;
4924   if (colmap) *colmap = a->colmap;
4925   if (multScatter) *multScatter = a->Mvctx;
4926   PetscFunctionReturn(0);
4927 }
4928 
4929 EXTERN_C_BEGIN
4930 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*);
4931 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*);
4932 EXTERN_C_END
4933 
4934 #include "../src/mat/impls/dense/mpi/mpidense.h"
4935 
4936 #undef __FUNCT__
4937 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4938 /*
4939     Computes (B'*A')' since computing B*A directly is untenable
4940 
4941                n                       p                          p
4942         (              )       (              )         (                  )
4943       m (      A       )  *  n (       B      )   =   m (         C        )
4944         (              )       (              )         (                  )
4945 
4946 */
4947 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4948 {
4949   PetscErrorCode     ierr;
4950   Mat                At,Bt,Ct;
4951 
4952   PetscFunctionBegin;
4953   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4954   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4955   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4956   ierr = MatDestroy(At);CHKERRQ(ierr);
4957   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4958   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4959   ierr = MatDestroy(Ct);CHKERRQ(ierr);
4960   PetscFunctionReturn(0);
4961 }
4962 
4963 #undef __FUNCT__
4964 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4965 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4966 {
4967   PetscErrorCode ierr;
4968   PetscInt       m=A->rmap->n,n=B->cmap->n;
4969   Mat            Cmat;
4970 
4971   PetscFunctionBegin;
4972   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4973   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
4974   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4975   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4976   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
4977   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4978   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4979   *C   = Cmat;
4980   PetscFunctionReturn(0);
4981 }
4982 
4983 /* ----------------------------------------------------------------*/
4984 #undef __FUNCT__
4985 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4986 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4987 {
4988   PetscErrorCode ierr;
4989 
4990   PetscFunctionBegin;
4991   if (scall == MAT_INITIAL_MATRIX){
4992     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4993   }
4994   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4995   PetscFunctionReturn(0);
4996 }
4997 
4998 EXTERN_C_BEGIN
4999 #if defined(PETSC_HAVE_MUMPS)
5000 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*);
5001 #endif
5002 #if defined(PETSC_HAVE_PASTIX)
5003 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5004 #endif
5005 #if defined(PETSC_HAVE_SUPERLU_DIST)
5006 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5007 #endif
5008 #if defined(PETSC_HAVE_SPOOLES)
5009 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5010 #endif
5011 EXTERN_C_END
5012 
5013 /*MC
5014    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5015 
5016    Options Database Keys:
5017 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5018 
5019   Level: beginner
5020 
5021 .seealso: MatCreateMPIAIJ()
5022 M*/
5023 
5024 EXTERN_C_BEGIN
5025 #undef __FUNCT__
5026 #define __FUNCT__ "MatCreate_MPIAIJ"
5027 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
5028 {
5029   Mat_MPIAIJ     *b;
5030   PetscErrorCode ierr;
5031   PetscMPIInt    size;
5032 
5033   PetscFunctionBegin;
5034   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5035 
5036   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5037   B->data         = (void*)b;
5038   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5039   B->rmap->bs      = 1;
5040   B->assembled    = PETSC_FALSE;
5041   B->mapping      = 0;
5042 
5043   B->insertmode      = NOT_SET_VALUES;
5044   b->size            = size;
5045   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5046 
5047   /* build cache for off array entries formed */
5048   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5049   b->donotstash  = PETSC_FALSE;
5050   b->colmap      = 0;
5051   b->garray      = 0;
5052   b->roworiented = PETSC_TRUE;
5053 
5054   /* stuff used for matrix vector multiply */
5055   b->lvec      = PETSC_NULL;
5056   b->Mvctx     = PETSC_NULL;
5057 
5058   /* stuff for MatGetRow() */
5059   b->rowindices   = 0;
5060   b->rowvalues    = 0;
5061   b->getrowactive = PETSC_FALSE;
5062 
5063 #if defined(PETSC_HAVE_SPOOLES)
5064   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_spooles_C",
5065                                      "MatGetFactor_mpiaij_spooles",
5066                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5067 #endif
5068 #if defined(PETSC_HAVE_MUMPS)
5069   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_mumps_C",
5070                                      "MatGetFactor_mpiaij_mumps",
5071                                      MatGetFactor_mpiaij_mumps);CHKERRQ(ierr);
5072 #endif
5073 #if defined(PETSC_HAVE_PASTIX)
5074   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_pastix_C",
5075 					   "MatGetFactor_mpiaij_pastix",
5076 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5077 #endif
5078 #if defined(PETSC_HAVE_SUPERLU_DIST)
5079   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_superlu_dist_C",
5080                                      "MatGetFactor_mpiaij_superlu_dist",
5081                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5082 #endif
5083   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5084                                      "MatStoreValues_MPIAIJ",
5085                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5086   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5087                                      "MatRetrieveValues_MPIAIJ",
5088                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5089   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5090 				     "MatGetDiagonalBlock_MPIAIJ",
5091                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5092   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5093 				     "MatIsTranspose_MPIAIJ",
5094 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5095   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5096 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5097 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5098   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5099 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5100 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5101   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5102 				     "MatDiagonalScaleLocal_MPIAIJ",
5103 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5104   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
5105                                      "MatConvert_MPIAIJ_MPICSRPERM",
5106                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
5107   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
5108                                      "MatConvert_MPIAIJ_MPICRL",
5109                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
5110   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5111                                      "MatMatMult_MPIDense_MPIAIJ",
5112                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5113   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5114                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5115                                       MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5116   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5117                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5118                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5119   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5120   PetscFunctionReturn(0);
5121 }
5122 EXTERN_C_END
5123 
5124 #undef __FUNCT__
5125 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5126 /*@
5127      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5128          and "off-diagonal" part of the matrix in CSR format.
5129 
5130    Collective on MPI_Comm
5131 
5132    Input Parameters:
5133 +  comm - MPI communicator
5134 .  m - number of local rows (Cannot be PETSC_DECIDE)
5135 .  n - This value should be the same as the local size used in creating the
5136        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5137        calculated if N is given) For square matrices n is almost always m.
5138 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5139 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5140 .   i - row indices for "diagonal" portion of matrix
5141 .   j - column indices
5142 .   a - matrix values
5143 .   oi - row indices for "off-diagonal" portion of matrix
5144 .   oj - column indices
5145 -   oa - matrix values
5146 
5147    Output Parameter:
5148 .   mat - the matrix
5149 
5150    Level: advanced
5151 
5152    Notes:
5153        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5154 
5155        The i and j indices are 0 based
5156 
5157        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5158 
5159 
5160 .keywords: matrix, aij, compressed row, sparse, parallel
5161 
5162 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5163           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5164 @*/
5165 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5166 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5167 {
5168   PetscErrorCode ierr;
5169   Mat_MPIAIJ     *maij;
5170 
5171  PetscFunctionBegin;
5172   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5173   if (i[0]) {
5174     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5175   }
5176   if (oi[0]) {
5177     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5178   }
5179   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5180   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5181   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5182   maij = (Mat_MPIAIJ*) (*mat)->data;
5183   maij->donotstash     = PETSC_TRUE;
5184   (*mat)->preallocated = PETSC_TRUE;
5185 
5186   ierr = PetscMapSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5187   ierr = PetscMapSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5188   ierr = PetscMapSetUp((*mat)->rmap);CHKERRQ(ierr);
5189   ierr = PetscMapSetUp((*mat)->cmap);CHKERRQ(ierr);
5190 
5191   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5192   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5193 
5194   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5195   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5196   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5197   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5198 
5199   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5200   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5201   PetscFunctionReturn(0);
5202 }
5203 
5204 /*
5205     Special version for direct calls from Fortran
5206 */
5207 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5208 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5209 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5210 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5211 #endif
5212 
5213 /* Change these macros so can be used in void function */
5214 #undef CHKERRQ
5215 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5216 #undef SETERRQ2
5217 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5218 #undef SETERRQ
5219 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5220 
5221 EXTERN_C_BEGIN
5222 #undef __FUNCT__
5223 #define __FUNCT__ "matsetvaluesmpiaij_"
5224 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5225 {
5226   Mat             mat = *mmat;
5227   PetscInt        m = *mm, n = *mn;
5228   InsertMode      addv = *maddv;
5229   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5230   PetscScalar     value;
5231   PetscErrorCode  ierr;
5232 
5233   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5234   if (mat->insertmode == NOT_SET_VALUES) {
5235     mat->insertmode = addv;
5236   }
5237 #if defined(PETSC_USE_DEBUG)
5238   else if (mat->insertmode != addv) {
5239     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5240   }
5241 #endif
5242   {
5243   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5244   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5245   PetscTruth      roworiented = aij->roworiented;
5246 
5247   /* Some Variables required in the macro */
5248   Mat             A = aij->A;
5249   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5250   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5251   MatScalar       *aa = a->a;
5252   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5253   Mat             B = aij->B;
5254   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5255   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5256   MatScalar       *ba = b->a;
5257 
5258   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5259   PetscInt        nonew = a->nonew;
5260   MatScalar       *ap1,*ap2;
5261 
5262   PetscFunctionBegin;
5263   for (i=0; i<m; i++) {
5264     if (im[i] < 0) continue;
5265 #if defined(PETSC_USE_DEBUG)
5266     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5267 #endif
5268     if (im[i] >= rstart && im[i] < rend) {
5269       row      = im[i] - rstart;
5270       lastcol1 = -1;
5271       rp1      = aj + ai[row];
5272       ap1      = aa + ai[row];
5273       rmax1    = aimax[row];
5274       nrow1    = ailen[row];
5275       low1     = 0;
5276       high1    = nrow1;
5277       lastcol2 = -1;
5278       rp2      = bj + bi[row];
5279       ap2      = ba + bi[row];
5280       rmax2    = bimax[row];
5281       nrow2    = bilen[row];
5282       low2     = 0;
5283       high2    = nrow2;
5284 
5285       for (j=0; j<n; j++) {
5286         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5287         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5288         if (in[j] >= cstart && in[j] < cend){
5289           col = in[j] - cstart;
5290           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5291         } else if (in[j] < 0) continue;
5292 #if defined(PETSC_USE_DEBUG)
5293         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5294 #endif
5295         else {
5296           if (mat->was_assembled) {
5297             if (!aij->colmap) {
5298               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5299             }
5300 #if defined (PETSC_USE_CTABLE)
5301             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5302 	    col--;
5303 #else
5304             col = aij->colmap[in[j]] - 1;
5305 #endif
5306             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5307               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5308               col =  in[j];
5309               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5310               B = aij->B;
5311               b = (Mat_SeqAIJ*)B->data;
5312               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5313               rp2      = bj + bi[row];
5314               ap2      = ba + bi[row];
5315               rmax2    = bimax[row];
5316               nrow2    = bilen[row];
5317               low2     = 0;
5318               high2    = nrow2;
5319               bm       = aij->B->rmap->n;
5320               ba = b->a;
5321             }
5322           } else col = in[j];
5323           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5324         }
5325       }
5326     } else {
5327       if (!aij->donotstash) {
5328         if (roworiented) {
5329           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5330         } else {
5331           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5332         }
5333       }
5334     }
5335   }}
5336   PetscFunctionReturnVoid();
5337 }
5338 EXTERN_C_END
5339 
5340