xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 0daa03b54710b531a08e13569faabf6ae762fa03)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "MatDistribute_MPIAIJ"
7 /*
8     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
9     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
10 
11     Only for square matrices
12 */
13 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
14 {
15   PetscMPIInt    rank,size;
16   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
17   PetscErrorCode ierr;
18   Mat            mat;
19   Mat_SeqAIJ     *gmata;
20   PetscMPIInt    tag;
21   MPI_Status     status;
22   PetscTruth     aij;
23   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
24 
25   PetscFunctionBegin;
26   CHKMEMQ;
27   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
28   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
29   if (!rank) {
30     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
31     if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
32   }
33   if (reuse == MAT_INITIAL_MATRIX) {
34     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
35     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
36     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
37     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
38     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
39     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
40     rowners[0] = 0;
41     for (i=2; i<=size; i++) {
42       rowners[i] += rowners[i-1];
43     }
44     rstart = rowners[rank];
45     rend   = rowners[rank+1];
46     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
47     if (!rank) {
48       gmata = (Mat_SeqAIJ*) gmat->data;
49       /* send row lengths to all processors */
50       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
51       for (i=1; i<size; i++) {
52 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
53       }
54       /* determine number diagonal and off-diagonal counts */
55       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
56       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
57       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
58       jj = 0;
59       for (i=0; i<m; i++) {
60 	for (j=0; j<dlens[i]; j++) {
61           if (gmata->j[jj] < rstart) ld[i]++;
62 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
63 	  jj++;
64 	}
65       }
66       /* send column indices to other processes */
67       for (i=1; i<size; i++) {
68 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
69 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
70 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71       }
72 
73       /* send numerical values to other processes */
74       for (i=1; i<size; i++) {
75         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
76         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
77       }
78       gmataa = gmata->a;
79       gmataj = gmata->j;
80 
81     } else {
82       /* receive row lengths */
83       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
84       /* receive column indices */
85       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
86       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
87       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
88       /* determine number diagonal and off-diagonal counts */
89       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
90       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
91       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
92       jj = 0;
93       for (i=0; i<m; i++) {
94 	for (j=0; j<dlens[i]; j++) {
95           if (gmataj[jj] < rstart) ld[i]++;
96 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
97 	  jj++;
98 	}
99       }
100       /* receive numerical values */
101       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
102       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
103     }
104     /* set preallocation */
105     for (i=0; i<m; i++) {
106       dlens[i] -= olens[i];
107     }
108     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
109     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
110 
111     for (i=0; i<m; i++) {
112       dlens[i] += olens[i];
113     }
114     cnt  = 0;
115     for (i=0; i<m; i++) {
116       row  = rstart + i;
117       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
118       cnt += dlens[i];
119     }
120     if (rank) {
121       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
122     }
123     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
124     ierr = PetscFree(rowners);CHKERRQ(ierr);
125     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
126     *inmat = mat;
127   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
128     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
129     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
130     mat   = *inmat;
131     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
132     if (!rank) {
133       /* send numerical values to other processes */
134       gmata = (Mat_SeqAIJ*) gmat->data;
135       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
136       gmataa = gmata->a;
137       for (i=1; i<size; i++) {
138         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
139         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
140       }
141       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
142     } else {
143       /* receive numerical values from process 0*/
144       nz   = Ad->nz + Ao->nz;
145       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
146       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
147     }
148     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
149     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
150     ad = Ad->a;
151     ao = Ao->a;
152     if (mat->rmap->n) {
153       i  = 0;
154       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
155       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
156     }
157     for (i=1; i<mat->rmap->n; i++) {
158       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
159       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
160     }
161     i--;
162     if (mat->rmap->n) {
163       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
164     }
165     if (rank) {
166       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
167     }
168   }
169   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
170   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   CHKMEMQ;
172   PetscFunctionReturn(0);
173 }
174 
175 /*
176   Local utility routine that creates a mapping from the global column
177 number to the local number in the off-diagonal part of the local
178 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
179 a slightly higher hash table cost; without it it is not scalable (each processor
180 has an order N integer array but is fast to acess.
181 */
182 #undef __FUNCT__
183 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
184 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
185 {
186   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
187   PetscErrorCode ierr;
188   PetscInt       n = aij->B->cmap->n,i;
189 
190   PetscFunctionBegin;
191 #if defined (PETSC_USE_CTABLE)
192   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
193   for (i=0; i<n; i++){
194     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
195   }
196 #else
197   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
198   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
199   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
201 #endif
202   PetscFunctionReturn(0);
203 }
204 
205 
206 #define CHUNKSIZE   15
207 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
208 { \
209     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
210     lastcol1 = col;\
211     while (high1-low1 > 5) { \
212       t = (low1+high1)/2; \
213       if (rp1[t] > col) high1 = t; \
214       else             low1  = t; \
215     } \
216       for (_i=low1; _i<high1; _i++) { \
217         if (rp1[_i] > col) break; \
218         if (rp1[_i] == col) { \
219           if (addv == ADD_VALUES) ap1[_i] += value;   \
220           else                    ap1[_i] = value; \
221           goto a_noinsert; \
222         } \
223       }  \
224       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
225       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
226       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
227       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
228       N = nrow1++ - 1; a->nz++; high1++; \
229       /* shift up all the later entries in this row */ \
230       for (ii=N; ii>=_i; ii--) { \
231         rp1[ii+1] = rp1[ii]; \
232         ap1[ii+1] = ap1[ii]; \
233       } \
234       rp1[_i] = col;  \
235       ap1[_i] = value;  \
236       a_noinsert: ; \
237       ailen[row] = nrow1; \
238 }
239 
240 
241 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
242 { \
243     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
244     lastcol2 = col;\
245     while (high2-low2 > 5) { \
246       t = (low2+high2)/2; \
247       if (rp2[t] > col) high2 = t; \
248       else             low2  = t; \
249     } \
250     for (_i=low2; _i<high2; _i++) {		\
251       if (rp2[_i] > col) break;			\
252       if (rp2[_i] == col) {			      \
253 	if (addv == ADD_VALUES) ap2[_i] += value;     \
254 	else                    ap2[_i] = value;      \
255 	goto b_noinsert;			      \
256       }						      \
257     }							      \
258     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
259     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
260     if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
261     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
262     N = nrow2++ - 1; b->nz++; high2++;					\
263     /* shift up all the later entries in this row */			\
264     for (ii=N; ii>=_i; ii--) {						\
265       rp2[ii+1] = rp2[ii];						\
266       ap2[ii+1] = ap2[ii];						\
267     }									\
268     rp2[_i] = col;							\
269     ap2[_i] = value;							\
270     b_noinsert: ;								\
271     bilen[row] = nrow2;							\
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
276 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
277 {
278   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
279   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
280   PetscErrorCode ierr;
281   PetscInt       l,*garray = mat->garray,diag;
282 
283   PetscFunctionBegin;
284   /* code only works for square matrices A */
285 
286   /* find size of row to the left of the diagonal part */
287   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
288   row  = row - diag;
289   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
290     if (garray[b->j[b->i[row]+l]] > diag) break;
291   }
292   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
293 
294   /* diagonal part */
295   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
296 
297   /* right of diagonal part */
298   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
299   PetscFunctionReturn(0);
300 }
301 
302 #undef __FUNCT__
303 #define __FUNCT__ "MatSetValues_MPIAIJ"
304 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
305 {
306   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
307   PetscScalar    value;
308   PetscErrorCode ierr;
309   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
310   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
311   PetscTruth     roworiented = aij->roworiented;
312 
313   /* Some Variables required in the macro */
314   Mat            A = aij->A;
315   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
316   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
317   MatScalar      *aa = a->a;
318   PetscTruth     ignorezeroentries = a->ignorezeroentries;
319   Mat            B = aij->B;
320   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
321   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
322   MatScalar      *ba = b->a;
323 
324   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
325   PetscInt       nonew = a->nonew;
326   MatScalar      *ap1,*ap2;
327 
328   PetscFunctionBegin;
329   for (i=0; i<m; i++) {
330     if (im[i] < 0) continue;
331 #if defined(PETSC_USE_DEBUG)
332     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
333 #endif
334     if (im[i] >= rstart && im[i] < rend) {
335       row      = im[i] - rstart;
336       lastcol1 = -1;
337       rp1      = aj + ai[row];
338       ap1      = aa + ai[row];
339       rmax1    = aimax[row];
340       nrow1    = ailen[row];
341       low1     = 0;
342       high1    = nrow1;
343       lastcol2 = -1;
344       rp2      = bj + bi[row];
345       ap2      = ba + bi[row];
346       rmax2    = bimax[row];
347       nrow2    = bilen[row];
348       low2     = 0;
349       high2    = nrow2;
350 
351       for (j=0; j<n; j++) {
352         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
353         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
354         if (in[j] >= cstart && in[j] < cend){
355           col = in[j] - cstart;
356           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
357         } else if (in[j] < 0) continue;
358 #if defined(PETSC_USE_DEBUG)
359         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
360 #endif
361         else {
362           if (mat->was_assembled) {
363             if (!aij->colmap) {
364               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
365             }
366 #if defined (PETSC_USE_CTABLE)
367             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
368 	    col--;
369 #else
370             col = aij->colmap[in[j]] - 1;
371 #endif
372             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
373               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
374               col =  in[j];
375               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
376               B = aij->B;
377               b = (Mat_SeqAIJ*)B->data;
378               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
379               rp2      = bj + bi[row];
380               ap2      = ba + bi[row];
381               rmax2    = bimax[row];
382               nrow2    = bilen[row];
383               low2     = 0;
384               high2    = nrow2;
385               bm       = aij->B->rmap->n;
386               ba = b->a;
387             }
388           } else col = in[j];
389           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
390         }
391       }
392     } else {
393       if (!aij->donotstash) {
394         if (roworiented) {
395           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
396         } else {
397           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
398         }
399       }
400     }
401   }
402   PetscFunctionReturn(0);
403 }
404 
405 #undef __FUNCT__
406 #define __FUNCT__ "MatGetValues_MPIAIJ"
407 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
408 {
409   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
410   PetscErrorCode ierr;
411   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
412   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
413 
414   PetscFunctionBegin;
415   for (i=0; i<m; i++) {
416     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
417     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
418     if (idxm[i] >= rstart && idxm[i] < rend) {
419       row = idxm[i] - rstart;
420       for (j=0; j<n; j++) {
421         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
422         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
423         if (idxn[j] >= cstart && idxn[j] < cend){
424           col = idxn[j] - cstart;
425           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
426         } else {
427           if (!aij->colmap) {
428             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
429           }
430 #if defined (PETSC_USE_CTABLE)
431           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
432           col --;
433 #else
434           col = aij->colmap[idxn[j]] - 1;
435 #endif
436           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
437           else {
438             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
439           }
440         }
441       }
442     } else {
443       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
444     }
445   }
446   PetscFunctionReturn(0);
447 }
448 
449 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
453 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
454 {
455   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
456   PetscErrorCode ierr;
457   PetscInt       nstash,reallocs;
458   InsertMode     addv;
459 
460   PetscFunctionBegin;
461   if (aij->donotstash) {
462     PetscFunctionReturn(0);
463   }
464 
465   /* make sure all processors are either in INSERTMODE or ADDMODE */
466   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
467   if (addv == (ADD_VALUES|INSERT_VALUES)) {
468     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
469   }
470   mat->insertmode = addv; /* in case this processor had no cache */
471 
472   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
473   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
474   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
475   PetscFunctionReturn(0);
476 }
477 
478 #undef __FUNCT__
479 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
480 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
481 {
482   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
483   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
484   PetscErrorCode ierr;
485   PetscMPIInt    n;
486   PetscInt       i,j,rstart,ncols,flg;
487   PetscInt       *row,*col;
488   PetscTruth     other_disassembled;
489   PetscScalar    *val;
490   InsertMode     addv = mat->insertmode;
491 
492   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
493   PetscFunctionBegin;
494   if (!aij->donotstash) {
495     while (1) {
496       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
497       if (!flg) break;
498 
499       for (i=0; i<n;) {
500         /* Now identify the consecutive vals belonging to the same row */
501         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
502         if (j < n) ncols = j-i;
503         else       ncols = n-i;
504         /* Now assemble all these values with a single function call */
505         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
506         i = j;
507       }
508     }
509     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
510   }
511   a->compressedrow.use     = PETSC_FALSE;
512   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
513   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
514 
515   /* determine if any processor has disassembled, if so we must
516      also disassemble ourselfs, in order that we may reassemble. */
517   /*
518      if nonzero structure of submatrix B cannot change then we know that
519      no processor disassembled thus we can skip this stuff
520   */
521   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
522     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
523     if (mat->was_assembled && !other_disassembled) {
524       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
525     }
526   }
527   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
528     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
529   }
530   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
531   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
532   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
533   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
534 
535   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
536   aij->rowvalues = 0;
537 
538   /* used by MatAXPY() */
539   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
540   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
541 
542   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
543   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
544   PetscFunctionReturn(0);
545 }
546 
547 #undef __FUNCT__
548 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
549 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
550 {
551   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
552   PetscErrorCode ierr;
553 
554   PetscFunctionBegin;
555   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
556   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatZeroRows_MPIAIJ"
562 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
563 {
564   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
565   PetscErrorCode ierr;
566   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
567   PetscInt       i,*owners = A->rmap->range;
568   PetscInt       *nprocs,j,idx,nsends,row;
569   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
570   PetscInt       *rvalues,count,base,slen,*source;
571   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
572   MPI_Comm       comm = ((PetscObject)A)->comm;
573   MPI_Request    *send_waits,*recv_waits;
574   MPI_Status     recv_status,*send_status;
575 #if defined(PETSC_DEBUG)
576   PetscTruth     found = PETSC_FALSE;
577 #endif
578 
579   PetscFunctionBegin;
580   /*  first count number of contributors to each processor */
581   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
582   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
583   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
584   j = 0;
585   for (i=0; i<N; i++) {
586     if (lastidx > (idx = rows[i])) j = 0;
587     lastidx = idx;
588     for (; j<size; j++) {
589       if (idx >= owners[j] && idx < owners[j+1]) {
590         nprocs[2*j]++;
591         nprocs[2*j+1] = 1;
592         owner[i] = j;
593 #if defined(PETSC_DEBUG)
594         found = PETSC_TRUE;
595 #endif
596         break;
597       }
598     }
599 #if defined(PETSC_DEBUG)
600     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
601     found = PETSC_FALSE;
602 #endif
603   }
604   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
605 
606   /* inform other processors of number of messages and max length*/
607   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
608 
609   /* post receives:   */
610   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
611   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
612   for (i=0; i<nrecvs; i++) {
613     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
614   }
615 
616   /* do sends:
617       1) starts[i] gives the starting index in svalues for stuff going to
618          the ith processor
619   */
620   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
621   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
622   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
623   starts[0] = 0;
624   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
625   for (i=0; i<N; i++) {
626     svalues[starts[owner[i]]++] = rows[i];
627   }
628 
629   starts[0] = 0;
630   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
631   count = 0;
632   for (i=0; i<size; i++) {
633     if (nprocs[2*i+1]) {
634       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
635     }
636   }
637   ierr = PetscFree(starts);CHKERRQ(ierr);
638 
639   base = owners[rank];
640 
641   /*  wait on receives */
642   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
643   source = lens + nrecvs;
644   count  = nrecvs; slen = 0;
645   while (count) {
646     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
647     /* unpack receives into our local space */
648     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
649     source[imdex]  = recv_status.MPI_SOURCE;
650     lens[imdex]    = n;
651     slen          += n;
652     count--;
653   }
654   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
655 
656   /* move the data into the send scatter */
657   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
658   count = 0;
659   for (i=0; i<nrecvs; i++) {
660     values = rvalues + i*nmax;
661     for (j=0; j<lens[i]; j++) {
662       lrows[count++] = values[j] - base;
663     }
664   }
665   ierr = PetscFree(rvalues);CHKERRQ(ierr);
666   ierr = PetscFree(lens);CHKERRQ(ierr);
667   ierr = PetscFree(owner);CHKERRQ(ierr);
668   ierr = PetscFree(nprocs);CHKERRQ(ierr);
669 
670   /* actually zap the local rows */
671   /*
672         Zero the required rows. If the "diagonal block" of the matrix
673      is square and the user wishes to set the diagonal we use separate
674      code so that MatSetValues() is not called for each diagonal allocating
675      new memory, thus calling lots of mallocs and slowing things down.
676 
677   */
678   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
679   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
680   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
681     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
682   } else if (diag != 0.0) {
683     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
684     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
685       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
687     }
688     for (i = 0; i < slen; i++) {
689       row  = lrows[i] + rstart;
690       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
691     }
692     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
693     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
694   } else {
695     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
696   }
697   ierr = PetscFree(lrows);CHKERRQ(ierr);
698 
699   /* wait on sends */
700   if (nsends) {
701     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
702     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
703     ierr = PetscFree(send_status);CHKERRQ(ierr);
704   }
705   ierr = PetscFree(send_waits);CHKERRQ(ierr);
706   ierr = PetscFree(svalues);CHKERRQ(ierr);
707 
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNCT__
712 #define __FUNCT__ "MatMult_MPIAIJ"
713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
714 {
715   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
716   PetscErrorCode ierr;
717   PetscInt       nt;
718 
719   PetscFunctionBegin;
720   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
721   if (nt != A->cmap->n) {
722     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
723   }
724   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
725   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
726   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
727   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
728   PetscFunctionReturn(0);
729 }
730 
731 #undef __FUNCT__
732 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
733 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
734 {
735   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
736   PetscErrorCode ierr;
737 
738   PetscFunctionBegin;
739   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatMultAdd_MPIAIJ"
745 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
746 {
747   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
748   PetscErrorCode ierr;
749 
750   PetscFunctionBegin;
751   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
752   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
753   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
754   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
755   PetscFunctionReturn(0);
756 }
757 
758 #undef __FUNCT__
759 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
760 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
761 {
762   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
763   PetscErrorCode ierr;
764   PetscTruth     merged;
765 
766   PetscFunctionBegin;
767   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
768   /* do nondiagonal part */
769   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
770   if (!merged) {
771     /* send it on its way */
772     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
773     /* do local part */
774     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
775     /* receive remote parts: note this assumes the values are not actually */
776     /* added in yy until the next line, */
777     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
778   } else {
779     /* do local part */
780     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
781     /* send it on its way */
782     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
783     /* values actually were received in the Begin() but we need to call this nop */
784     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
785   }
786   PetscFunctionReturn(0);
787 }
788 
789 EXTERN_C_BEGIN
790 #undef __FUNCT__
791 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
792 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
793 {
794   MPI_Comm       comm;
795   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
796   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
797   IS             Me,Notme;
798   PetscErrorCode ierr;
799   PetscInt       M,N,first,last,*notme,i;
800   PetscMPIInt    size;
801 
802   PetscFunctionBegin;
803 
804   /* Easy test: symmetric diagonal block */
805   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
806   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
807   if (!*f) PetscFunctionReturn(0);
808   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
809   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
810   if (size == 1) PetscFunctionReturn(0);
811 
812   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
813   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
814   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
815   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
816   for (i=0; i<first; i++) notme[i] = i;
817   for (i=last; i<M; i++) notme[i-last+first] = i;
818   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
819   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
820   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
821   Aoff = Aoffs[0];
822   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
823   Boff = Boffs[0];
824   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
825   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
826   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
827   ierr = ISDestroy(Me);CHKERRQ(ierr);
828   ierr = ISDestroy(Notme);CHKERRQ(ierr);
829 
830   PetscFunctionReturn(0);
831 }
832 EXTERN_C_END
833 
834 #undef __FUNCT__
835 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
836 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
837 {
838   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
839   PetscErrorCode ierr;
840 
841   PetscFunctionBegin;
842   /* do nondiagonal part */
843   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
844   /* send it on its way */
845   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
846   /* do local part */
847   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
848   /* receive remote parts */
849   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
850   PetscFunctionReturn(0);
851 }
852 
853 /*
854   This only works correctly for square matrices where the subblock A->A is the
855    diagonal block
856 */
857 #undef __FUNCT__
858 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
859 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
860 {
861   PetscErrorCode ierr;
862   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
863 
864   PetscFunctionBegin;
865   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
866   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) {
867     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
868   }
869   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "MatScale_MPIAIJ"
875 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
876 {
877   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
878   PetscErrorCode ierr;
879 
880   PetscFunctionBegin;
881   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
882   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
883   PetscFunctionReturn(0);
884 }
885 
886 #undef __FUNCT__
887 #define __FUNCT__ "MatDestroy_MPIAIJ"
888 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
889 {
890   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
891   PetscErrorCode ierr;
892 
893   PetscFunctionBegin;
894 #if defined(PETSC_USE_LOG)
895   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
896 #endif
897   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
898   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
899   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
900   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
901 #if defined (PETSC_USE_CTABLE)
902   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
903 #else
904   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
905 #endif
906   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
907   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
908   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
909   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
910   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
911   ierr = PetscFree(aij);CHKERRQ(ierr);
912 
913   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
914   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
915   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
916   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
917   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
918   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
919   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
920   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
921   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
922   PetscFunctionReturn(0);
923 }
924 
925 #undef __FUNCT__
926 #define __FUNCT__ "MatView_MPIAIJ_Binary"
927 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
928 {
929   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
930   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
931   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
932   PetscErrorCode    ierr;
933   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
934   int               fd;
935   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
936   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
937   PetscScalar       *column_values;
938 
939   PetscFunctionBegin;
940   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
941   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
942   nz   = A->nz + B->nz;
943   if (!rank) {
944     header[0] = MAT_FILE_COOKIE;
945     header[1] = mat->rmap->N;
946     header[2] = mat->cmap->N;
947     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
948     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
949     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
950     /* get largest number of rows any processor has */
951     rlen = mat->rmap->n;
952     range = mat->rmap->range;
953     for (i=1; i<size; i++) {
954       rlen = PetscMax(rlen,range[i+1] - range[i]);
955     }
956   } else {
957     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
958     rlen = mat->rmap->n;
959   }
960 
961   /* load up the local row counts */
962   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
963   for (i=0; i<mat->rmap->n; i++) {
964     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
965   }
966 
967   /* store the row lengths to the file */
968   if (!rank) {
969     MPI_Status status;
970     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
971     for (i=1; i<size; i++) {
972       rlen = range[i+1] - range[i];
973       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
974       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
975     }
976   } else {
977     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
978   }
979   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
980 
981   /* load up the local column indices */
982   nzmax = nz; /* )th processor needs space a largest processor needs */
983   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
984   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
985   cnt  = 0;
986   for (i=0; i<mat->rmap->n; i++) {
987     for (j=B->i[i]; j<B->i[i+1]; j++) {
988       if ( (col = garray[B->j[j]]) > cstart) break;
989       column_indices[cnt++] = col;
990     }
991     for (k=A->i[i]; k<A->i[i+1]; k++) {
992       column_indices[cnt++] = A->j[k] + cstart;
993     }
994     for (; j<B->i[i+1]; j++) {
995       column_indices[cnt++] = garray[B->j[j]];
996     }
997   }
998   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
999 
1000   /* store the column indices to the file */
1001   if (!rank) {
1002     MPI_Status status;
1003     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1004     for (i=1; i<size; i++) {
1005       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1006       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1007       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1008       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1009     }
1010   } else {
1011     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1012     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1013   }
1014   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1015 
1016   /* load up the local column values */
1017   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1018   cnt  = 0;
1019   for (i=0; i<mat->rmap->n; i++) {
1020     for (j=B->i[i]; j<B->i[i+1]; j++) {
1021       if ( garray[B->j[j]] > cstart) break;
1022       column_values[cnt++] = B->a[j];
1023     }
1024     for (k=A->i[i]; k<A->i[i+1]; k++) {
1025       column_values[cnt++] = A->a[k];
1026     }
1027     for (; j<B->i[i+1]; j++) {
1028       column_values[cnt++] = B->a[j];
1029     }
1030   }
1031   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1032 
1033   /* store the column values to the file */
1034   if (!rank) {
1035     MPI_Status status;
1036     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1037     for (i=1; i<size; i++) {
1038       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1039       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1040       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1041       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1042     }
1043   } else {
1044     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1045     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1046   }
1047   ierr = PetscFree(column_values);CHKERRQ(ierr);
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 #undef __FUNCT__
1052 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1053 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1054 {
1055   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1056   PetscErrorCode    ierr;
1057   PetscMPIInt       rank = aij->rank,size = aij->size;
1058   PetscTruth        isdraw,iascii,isbinary;
1059   PetscViewer       sviewer;
1060   PetscViewerFormat format;
1061 
1062   PetscFunctionBegin;
1063   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1064   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1065   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1066   if (iascii) {
1067     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1068     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1069       MatInfo    info;
1070       PetscTruth inodes;
1071 
1072       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1073       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1074       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1075       if (!inodes) {
1076         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1077 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1078       } else {
1079         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1080 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1081       }
1082       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1083       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1084       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1085       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1086       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1087       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1088       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1089       PetscFunctionReturn(0);
1090     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1091       PetscInt   inodecount,inodelimit,*inodes;
1092       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1093       if (inodes) {
1094         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1095       } else {
1096         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1097       }
1098       PetscFunctionReturn(0);
1099     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1100       PetscFunctionReturn(0);
1101     }
1102   } else if (isbinary) {
1103     if (size == 1) {
1104       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1105       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1106     } else {
1107       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1108     }
1109     PetscFunctionReturn(0);
1110   } else if (isdraw) {
1111     PetscDraw  draw;
1112     PetscTruth isnull;
1113     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1114     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1115   }
1116 
1117   if (size == 1) {
1118     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1119     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1120   } else {
1121     /* assemble the entire matrix onto first processor. */
1122     Mat         A;
1123     Mat_SeqAIJ  *Aloc;
1124     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1125     MatScalar   *a;
1126 
1127     if (mat->rmap->N > 1024) {
1128       PetscTruth flg = PETSC_FALSE;
1129 
1130       ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1131       if (!flg) {
1132         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1133       }
1134     }
1135 
1136     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1137     if (!rank) {
1138       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1139     } else {
1140       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1141     }
1142     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1143     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1144     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1145     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1146 
1147     /* copy over the A part */
1148     Aloc = (Mat_SeqAIJ*)aij->A->data;
1149     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1150     row = mat->rmap->rstart;
1151     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1152     for (i=0; i<m; i++) {
1153       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1154       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1155     }
1156     aj = Aloc->j;
1157     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1158 
1159     /* copy over the B part */
1160     Aloc = (Mat_SeqAIJ*)aij->B->data;
1161     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1162     row  = mat->rmap->rstart;
1163     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1164     ct   = cols;
1165     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1166     for (i=0; i<m; i++) {
1167       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1168       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1169     }
1170     ierr = PetscFree(ct);CHKERRQ(ierr);
1171     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1172     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1173     /*
1174        Everyone has to call to draw the matrix since the graphics waits are
1175        synchronized across all processors that share the PetscDraw object
1176     */
1177     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1178     if (!rank) {
1179       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1180       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1181     }
1182     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1183     ierr = MatDestroy(A);CHKERRQ(ierr);
1184   }
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 #undef __FUNCT__
1189 #define __FUNCT__ "MatView_MPIAIJ"
1190 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1191 {
1192   PetscErrorCode ierr;
1193   PetscTruth     iascii,isdraw,issocket,isbinary;
1194 
1195   PetscFunctionBegin;
1196   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1197   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1198   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1199   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1200   if (iascii || isdraw || isbinary || issocket) {
1201     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1202   } else {
1203     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1204   }
1205   PetscFunctionReturn(0);
1206 }
1207 
1208 #undef __FUNCT__
1209 #define __FUNCT__ "MatSOR_MPIAIJ"
1210 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1211 {
1212   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1213   PetscErrorCode ierr;
1214   Vec            bb1 = 0;
1215   PetscTruth     hasop;
1216 
1217   PetscFunctionBegin;
1218   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1219     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1220   }
1221 
1222   if (flag == SOR_APPLY_UPPER) {
1223     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1224     PetscFunctionReturn(0);
1225   }
1226 
1227   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1228     if (flag & SOR_ZERO_INITIAL_GUESS) {
1229       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1230       its--;
1231     }
1232 
1233     while (its--) {
1234       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1235       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1236 
1237       /* update rhs: bb1 = bb - B*x */
1238       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1239       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1240 
1241       /* local sweep */
1242       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1243     }
1244   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1245     if (flag & SOR_ZERO_INITIAL_GUESS) {
1246       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1247       its--;
1248     }
1249     while (its--) {
1250       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1251       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1252 
1253       /* update rhs: bb1 = bb - B*x */
1254       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1255       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1256 
1257       /* local sweep */
1258       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1259     }
1260   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1261     if (flag & SOR_ZERO_INITIAL_GUESS) {
1262       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1263       its--;
1264     }
1265     while (its--) {
1266       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1267       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1268 
1269       /* update rhs: bb1 = bb - B*x */
1270       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1271       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1272 
1273       /* local sweep */
1274       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1275     }
1276   }  else if (flag & SOR_EISENSTAT) {
1277     Vec         xx1;
1278 
1279     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1280     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1281 
1282     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1283     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1284     if (!mat->diag) {
1285       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1286       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1287     }
1288     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1289     if (hasop) {
1290       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1291     } else {
1292       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1293     }
1294     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1295 
1296     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1297 
1298     /* local sweep */
1299     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1300     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1301     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1302   } else {
1303     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1304   }
1305 
1306   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatPermute_MPIAIJ"
1312 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1313 {
1314   MPI_Comm       comm,pcomm;
1315   PetscInt       first,local_size,nrows;
1316   const PetscInt *rows;
1317   PetscMPIInt    size;
1318   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1319   PetscErrorCode ierr;
1320 
1321   PetscFunctionBegin;
1322   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1323   /* make a collective version of 'rowp' */
1324   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1325   if (pcomm==comm) {
1326     crowp = rowp;
1327   } else {
1328     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1329     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1330     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1331     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1332   }
1333   /* collect the global row permutation and invert it */
1334   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1335   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1336   if (pcomm!=comm) {
1337     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1338   }
1339   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1340   /* get the local target indices */
1341   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1342   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1343   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1344   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
1345   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1346   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1347   /* the column permutation is so much easier;
1348      make a local version of 'colp' and invert it */
1349   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1350   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1351   if (size==1) {
1352     lcolp = colp;
1353   } else {
1354     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1355     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1356     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
1357   }
1358   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1359   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1360   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1361   if (size>1) {
1362     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1363     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1364   }
1365   /* now we just get the submatrix */
1366   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1367   /* clean up */
1368   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1369   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1370   PetscFunctionReturn(0);
1371 }
1372 
1373 #undef __FUNCT__
1374 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1375 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1376 {
1377   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1378   Mat            A = mat->A,B = mat->B;
1379   PetscErrorCode ierr;
1380   PetscReal      isend[5],irecv[5];
1381 
1382   PetscFunctionBegin;
1383   info->block_size     = 1.0;
1384   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1385   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1386   isend[3] = info->memory;  isend[4] = info->mallocs;
1387   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1388   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1389   isend[3] += info->memory;  isend[4] += info->mallocs;
1390   if (flag == MAT_LOCAL) {
1391     info->nz_used      = isend[0];
1392     info->nz_allocated = isend[1];
1393     info->nz_unneeded  = isend[2];
1394     info->memory       = isend[3];
1395     info->mallocs      = isend[4];
1396   } else if (flag == MAT_GLOBAL_MAX) {
1397     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1398     info->nz_used      = irecv[0];
1399     info->nz_allocated = irecv[1];
1400     info->nz_unneeded  = irecv[2];
1401     info->memory       = irecv[3];
1402     info->mallocs      = irecv[4];
1403   } else if (flag == MAT_GLOBAL_SUM) {
1404     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1405     info->nz_used      = irecv[0];
1406     info->nz_allocated = irecv[1];
1407     info->nz_unneeded  = irecv[2];
1408     info->memory       = irecv[3];
1409     info->mallocs      = irecv[4];
1410   }
1411   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1412   info->fill_ratio_needed = 0;
1413   info->factor_mallocs    = 0;
1414 
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 #undef __FUNCT__
1419 #define __FUNCT__ "MatSetOption_MPIAIJ"
1420 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1421 {
1422   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1423   PetscErrorCode ierr;
1424 
1425   PetscFunctionBegin;
1426   switch (op) {
1427   case MAT_NEW_NONZERO_LOCATIONS:
1428   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1429   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1430   case MAT_KEEP_NONZERO_PATTERN:
1431   case MAT_NEW_NONZERO_LOCATION_ERR:
1432   case MAT_USE_INODES:
1433   case MAT_IGNORE_ZERO_ENTRIES:
1434     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1435     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1436     break;
1437   case MAT_ROW_ORIENTED:
1438     a->roworiented = flg;
1439     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1440     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1441     break;
1442   case MAT_NEW_DIAGONALS:
1443     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1444     break;
1445   case MAT_IGNORE_OFF_PROC_ENTRIES:
1446     a->donotstash = PETSC_TRUE;
1447     break;
1448   case MAT_SYMMETRIC:
1449     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1450     break;
1451   case MAT_STRUCTURALLY_SYMMETRIC:
1452     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1453     break;
1454   case MAT_HERMITIAN:
1455     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1456     break;
1457   case MAT_SYMMETRY_ETERNAL:
1458     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1459     break;
1460   default:
1461     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1462   }
1463   PetscFunctionReturn(0);
1464 }
1465 
1466 #undef __FUNCT__
1467 #define __FUNCT__ "MatGetRow_MPIAIJ"
1468 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1469 {
1470   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1471   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1472   PetscErrorCode ierr;
1473   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1474   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1475   PetscInt       *cmap,*idx_p;
1476 
1477   PetscFunctionBegin;
1478   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1479   mat->getrowactive = PETSC_TRUE;
1480 
1481   if (!mat->rowvalues && (idx || v)) {
1482     /*
1483         allocate enough space to hold information from the longest row.
1484     */
1485     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1486     PetscInt     max = 1,tmp;
1487     for (i=0; i<matin->rmap->n; i++) {
1488       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1489       if (max < tmp) { max = tmp; }
1490     }
1491     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1492     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1493   }
1494 
1495   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1496   lrow = row - rstart;
1497 
1498   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1499   if (!v)   {pvA = 0; pvB = 0;}
1500   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1501   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1502   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1503   nztot = nzA + nzB;
1504 
1505   cmap  = mat->garray;
1506   if (v  || idx) {
1507     if (nztot) {
1508       /* Sort by increasing column numbers, assuming A and B already sorted */
1509       PetscInt imark = -1;
1510       if (v) {
1511         *v = v_p = mat->rowvalues;
1512         for (i=0; i<nzB; i++) {
1513           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1514           else break;
1515         }
1516         imark = i;
1517         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1518         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1519       }
1520       if (idx) {
1521         *idx = idx_p = mat->rowindices;
1522         if (imark > -1) {
1523           for (i=0; i<imark; i++) {
1524             idx_p[i] = cmap[cworkB[i]];
1525           }
1526         } else {
1527           for (i=0; i<nzB; i++) {
1528             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1529             else break;
1530           }
1531           imark = i;
1532         }
1533         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1534         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1535       }
1536     } else {
1537       if (idx) *idx = 0;
1538       if (v)   *v   = 0;
1539     }
1540   }
1541   *nz = nztot;
1542   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1543   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1544   PetscFunctionReturn(0);
1545 }
1546 
1547 #undef __FUNCT__
1548 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1549 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1550 {
1551   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1552 
1553   PetscFunctionBegin;
1554   if (!aij->getrowactive) {
1555     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1556   }
1557   aij->getrowactive = PETSC_FALSE;
1558   PetscFunctionReturn(0);
1559 }
1560 
1561 #undef __FUNCT__
1562 #define __FUNCT__ "MatNorm_MPIAIJ"
1563 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1564 {
1565   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1566   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1567   PetscErrorCode ierr;
1568   PetscInt       i,j,cstart = mat->cmap->rstart;
1569   PetscReal      sum = 0.0;
1570   MatScalar      *v;
1571 
1572   PetscFunctionBegin;
1573   if (aij->size == 1) {
1574     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1575   } else {
1576     if (type == NORM_FROBENIUS) {
1577       v = amat->a;
1578       for (i=0; i<amat->nz; i++) {
1579 #if defined(PETSC_USE_COMPLEX)
1580         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1581 #else
1582         sum += (*v)*(*v); v++;
1583 #endif
1584       }
1585       v = bmat->a;
1586       for (i=0; i<bmat->nz; i++) {
1587 #if defined(PETSC_USE_COMPLEX)
1588         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1589 #else
1590         sum += (*v)*(*v); v++;
1591 #endif
1592       }
1593       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1594       *norm = sqrt(*norm);
1595     } else if (type == NORM_1) { /* max column norm */
1596       PetscReal *tmp,*tmp2;
1597       PetscInt  *jj,*garray = aij->garray;
1598       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1599       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1600       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1601       *norm = 0.0;
1602       v = amat->a; jj = amat->j;
1603       for (j=0; j<amat->nz; j++) {
1604         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1605       }
1606       v = bmat->a; jj = bmat->j;
1607       for (j=0; j<bmat->nz; j++) {
1608         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1609       }
1610       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1611       for (j=0; j<mat->cmap->N; j++) {
1612         if (tmp2[j] > *norm) *norm = tmp2[j];
1613       }
1614       ierr = PetscFree(tmp);CHKERRQ(ierr);
1615       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1616     } else if (type == NORM_INFINITY) { /* max row norm */
1617       PetscReal ntemp = 0.0;
1618       for (j=0; j<aij->A->rmap->n; j++) {
1619         v = amat->a + amat->i[j];
1620         sum = 0.0;
1621         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1622           sum += PetscAbsScalar(*v); v++;
1623         }
1624         v = bmat->a + bmat->i[j];
1625         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1626           sum += PetscAbsScalar(*v); v++;
1627         }
1628         if (sum > ntemp) ntemp = sum;
1629       }
1630       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1631     } else {
1632       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1633     }
1634   }
1635   PetscFunctionReturn(0);
1636 }
1637 
1638 #undef __FUNCT__
1639 #define __FUNCT__ "MatTranspose_MPIAIJ"
1640 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1641 {
1642   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1643   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1644   PetscErrorCode ierr;
1645   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1646   PetscInt       cstart=A->cmap->rstart,ncol;
1647   Mat            B;
1648   MatScalar      *array;
1649 
1650   PetscFunctionBegin;
1651   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1652 
1653   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1654   ai = Aloc->i; aj = Aloc->j;
1655   bi = Bloc->i; bj = Bloc->j;
1656   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1657     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1658     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1659     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1660     for (i=0; i<ai[ma]; i++){
1661       d_nnz[aj[i]] ++;
1662       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1663     }
1664 
1665     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1666     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1667     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1668     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1669     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1670   } else {
1671     B = *matout;
1672   }
1673 
1674   /* copy over the A part */
1675   array = Aloc->a;
1676   row = A->rmap->rstart;
1677   for (i=0; i<ma; i++) {
1678     ncol = ai[i+1]-ai[i];
1679     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1680     row++; array += ncol; aj += ncol;
1681   }
1682   aj = Aloc->j;
1683   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1684 
1685   /* copy over the B part */
1686   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1687   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1688   array = Bloc->a;
1689   row = A->rmap->rstart;
1690   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1691   cols_tmp = cols;
1692   for (i=0; i<mb; i++) {
1693     ncol = bi[i+1]-bi[i];
1694     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1695     row++; array += ncol; cols_tmp += ncol;
1696   }
1697   ierr = PetscFree(cols);CHKERRQ(ierr);
1698 
1699   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1700   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1701   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1702     *matout = B;
1703   } else {
1704     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1705   }
1706   PetscFunctionReturn(0);
1707 }
1708 
1709 #undef __FUNCT__
1710 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1711 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1712 {
1713   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1714   Mat            a = aij->A,b = aij->B;
1715   PetscErrorCode ierr;
1716   PetscInt       s1,s2,s3;
1717 
1718   PetscFunctionBegin;
1719   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1720   if (rr) {
1721     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1722     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1723     /* Overlap communication with computation. */
1724     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1725   }
1726   if (ll) {
1727     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1728     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1729     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1730   }
1731   /* scale  the diagonal block */
1732   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1733 
1734   if (rr) {
1735     /* Do a scatter end and then right scale the off-diagonal block */
1736     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1737     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1738   }
1739 
1740   PetscFunctionReturn(0);
1741 }
1742 
1743 #undef __FUNCT__
1744 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1745 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1746 {
1747   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1748   PetscErrorCode ierr;
1749 
1750   PetscFunctionBegin;
1751   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1752   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1753   PetscFunctionReturn(0);
1754 }
1755 #undef __FUNCT__
1756 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1757 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1758 {
1759   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1760   PetscErrorCode ierr;
1761 
1762   PetscFunctionBegin;
1763   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1764   PetscFunctionReturn(0);
1765 }
1766 
1767 #undef __FUNCT__
1768 #define __FUNCT__ "MatEqual_MPIAIJ"
1769 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1770 {
1771   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1772   Mat            a,b,c,d;
1773   PetscTruth     flg;
1774   PetscErrorCode ierr;
1775 
1776   PetscFunctionBegin;
1777   a = matA->A; b = matA->B;
1778   c = matB->A; d = matB->B;
1779 
1780   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1781   if (flg) {
1782     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1783   }
1784   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1785   PetscFunctionReturn(0);
1786 }
1787 
1788 #undef __FUNCT__
1789 #define __FUNCT__ "MatCopy_MPIAIJ"
1790 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1791 {
1792   PetscErrorCode ierr;
1793   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1794   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1795 
1796   PetscFunctionBegin;
1797   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1798   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1799     /* because of the column compression in the off-processor part of the matrix a->B,
1800        the number of columns in a->B and b->B may be different, hence we cannot call
1801        the MatCopy() directly on the two parts. If need be, we can provide a more
1802        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1803        then copying the submatrices */
1804     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1805   } else {
1806     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1807     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1808   }
1809   PetscFunctionReturn(0);
1810 }
1811 
1812 #undef __FUNCT__
1813 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1814 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1815 {
1816   PetscErrorCode ierr;
1817 
1818   PetscFunctionBegin;
1819   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1820   PetscFunctionReturn(0);
1821 }
1822 
1823 #include "petscblaslapack.h"
1824 #undef __FUNCT__
1825 #define __FUNCT__ "MatAXPY_MPIAIJ"
1826 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1827 {
1828   PetscErrorCode ierr;
1829   PetscInt       i;
1830   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1831   PetscBLASInt   bnz,one=1;
1832   Mat_SeqAIJ     *x,*y;
1833 
1834   PetscFunctionBegin;
1835   if (str == SAME_NONZERO_PATTERN) {
1836     PetscScalar alpha = a;
1837     x = (Mat_SeqAIJ *)xx->A->data;
1838     y = (Mat_SeqAIJ *)yy->A->data;
1839     bnz = PetscBLASIntCast(x->nz);
1840     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1841     x = (Mat_SeqAIJ *)xx->B->data;
1842     y = (Mat_SeqAIJ *)yy->B->data;
1843     bnz = PetscBLASIntCast(x->nz);
1844     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1845   } else if (str == SUBSET_NONZERO_PATTERN) {
1846     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1847 
1848     x = (Mat_SeqAIJ *)xx->B->data;
1849     y = (Mat_SeqAIJ *)yy->B->data;
1850     if (y->xtoy && y->XtoY != xx->B) {
1851       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1852       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1853     }
1854     if (!y->xtoy) { /* get xtoy */
1855       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1856       y->XtoY = xx->B;
1857       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1858     }
1859     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1860   } else {
1861     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1862   }
1863   PetscFunctionReturn(0);
1864 }
1865 
1866 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1867 
1868 #undef __FUNCT__
1869 #define __FUNCT__ "MatConjugate_MPIAIJ"
1870 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1871 {
1872 #if defined(PETSC_USE_COMPLEX)
1873   PetscErrorCode ierr;
1874   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1875 
1876   PetscFunctionBegin;
1877   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1878   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1879 #else
1880   PetscFunctionBegin;
1881 #endif
1882   PetscFunctionReturn(0);
1883 }
1884 
1885 #undef __FUNCT__
1886 #define __FUNCT__ "MatRealPart_MPIAIJ"
1887 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1888 {
1889   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1890   PetscErrorCode ierr;
1891 
1892   PetscFunctionBegin;
1893   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1894   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1895   PetscFunctionReturn(0);
1896 }
1897 
1898 #undef __FUNCT__
1899 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1900 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1901 {
1902   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1903   PetscErrorCode ierr;
1904 
1905   PetscFunctionBegin;
1906   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1907   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1908   PetscFunctionReturn(0);
1909 }
1910 
1911 #ifdef PETSC_HAVE_PBGL
1912 
1913 #include <boost/parallel/mpi/bsp_process_group.hpp>
1914 #include <boost/graph/distributed/ilu_default_graph.hpp>
1915 #include <boost/graph/distributed/ilu_0_block.hpp>
1916 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1917 #include <boost/graph/distributed/petsc/interface.hpp>
1918 #include <boost/multi_array.hpp>
1919 #include <boost/parallel/distributed_property_map->hpp>
1920 
1921 #undef __FUNCT__
1922 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1923 /*
1924   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1925 */
1926 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1927 {
1928   namespace petsc = boost::distributed::petsc;
1929 
1930   namespace graph_dist = boost::graph::distributed;
1931   using boost::graph::distributed::ilu_default::process_group_type;
1932   using boost::graph::ilu_permuted;
1933 
1934   PetscTruth      row_identity, col_identity;
1935   PetscContainer  c;
1936   PetscInt        m, n, M, N;
1937   PetscErrorCode  ierr;
1938 
1939   PetscFunctionBegin;
1940   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1941   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1942   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1943   if (!row_identity || !col_identity) {
1944     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1945   }
1946 
1947   process_group_type pg;
1948   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1949   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1950   lgraph_type&   level_graph = *lgraph_p;
1951   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1952 
1953   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1954   ilu_permuted(level_graph);
1955 
1956   /* put together the new matrix */
1957   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1958   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1959   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1960   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1961   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1962   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1963   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1964 
1965   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1966   ierr = PetscContainerSetPointer(c, lgraph_p);
1967   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1968   PetscFunctionReturn(0);
1969 }
1970 
1971 #undef __FUNCT__
1972 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1973 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1974 {
1975   PetscFunctionBegin;
1976   PetscFunctionReturn(0);
1977 }
1978 
1979 #undef __FUNCT__
1980 #define __FUNCT__ "MatSolve_MPIAIJ"
1981 /*
1982   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1983 */
1984 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1985 {
1986   namespace graph_dist = boost::graph::distributed;
1987 
1988   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1989   lgraph_type*   lgraph_p;
1990   PetscContainer c;
1991   PetscErrorCode ierr;
1992 
1993   PetscFunctionBegin;
1994   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1995   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1996   ierr = VecCopy(b, x);CHKERRQ(ierr);
1997 
1998   PetscScalar* array_x;
1999   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2000   PetscInt sx;
2001   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2002 
2003   PetscScalar* array_b;
2004   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2005   PetscInt sb;
2006   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2007 
2008   lgraph_type&   level_graph = *lgraph_p;
2009   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2010 
2011   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2012   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2013                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2014 
2015   typedef boost::iterator_property_map<array_ref_type::iterator,
2016                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2017   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2018                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2019 
2020   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2021 
2022   PetscFunctionReturn(0);
2023 }
2024 #endif
2025 
2026 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2027   PetscInt       nzlocal,nsends,nrecvs;
2028   PetscMPIInt    *send_rank;
2029   PetscInt       *sbuf_nz,*sbuf_j,**rbuf_j;
2030   PetscScalar    *sbuf_a,**rbuf_a;
2031   PetscErrorCode (*MatDestroy)(Mat);
2032 } Mat_Redundant;
2033 
2034 #undef __FUNCT__
2035 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2036 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2037 {
2038   PetscErrorCode       ierr;
2039   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2040   PetscInt             i;
2041 
2042   PetscFunctionBegin;
2043   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
2044   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2045   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2046   for (i=0; i<redund->nrecvs; i++){
2047     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2048     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2049   }
2050   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2051   ierr = PetscFree(redund);CHKERRQ(ierr);
2052   PetscFunctionReturn(0);
2053 }
2054 
2055 #undef __FUNCT__
2056 #define __FUNCT__ "MatDestroy_MatRedundant"
2057 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2058 {
2059   PetscErrorCode  ierr;
2060   PetscContainer  container;
2061   Mat_Redundant   *redund=PETSC_NULL;
2062 
2063   PetscFunctionBegin;
2064   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2065   if (container) {
2066     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2067   } else {
2068     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2069   }
2070   A->ops->destroy = redund->MatDestroy;
2071   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2072   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2073   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2074   PetscFunctionReturn(0);
2075 }
2076 
2077 #undef __FUNCT__
2078 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2079 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2080 {
2081   PetscMPIInt    rank,size;
2082   MPI_Comm       comm=((PetscObject)mat)->comm;
2083   PetscErrorCode ierr;
2084   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2085   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2086   PetscInt       *rowrange=mat->rmap->range;
2087   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2088   Mat            A=aij->A,B=aij->B,C=*matredundant;
2089   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2090   PetscScalar    *sbuf_a;
2091   PetscInt       nzlocal=a->nz+b->nz;
2092   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2093   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2094   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2095   MatScalar      *aworkA,*aworkB;
2096   PetscScalar    *vals;
2097   PetscMPIInt    tag1,tag2,tag3,imdex;
2098   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2099                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2100   MPI_Status     recv_status,*send_status;
2101   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2102   PetscInt       **rbuf_j=PETSC_NULL;
2103   PetscScalar    **rbuf_a=PETSC_NULL;
2104   Mat_Redundant  *redund=PETSC_NULL;
2105   PetscContainer container;
2106 
2107   PetscFunctionBegin;
2108   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2109   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2110 
2111   if (reuse == MAT_REUSE_MATRIX) {
2112     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2113     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2114     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2115     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2116     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2117     if (container) {
2118       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2119     } else {
2120       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2121     }
2122     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2123 
2124     nsends    = redund->nsends;
2125     nrecvs    = redund->nrecvs;
2126     send_rank = redund->send_rank; recv_rank = send_rank + size;
2127     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
2128     sbuf_j    = redund->sbuf_j;
2129     sbuf_a    = redund->sbuf_a;
2130     rbuf_j    = redund->rbuf_j;
2131     rbuf_a    = redund->rbuf_a;
2132   }
2133 
2134   if (reuse == MAT_INITIAL_MATRIX){
2135     PetscMPIInt  subrank,subsize;
2136     PetscInt     nleftover,np_subcomm;
2137     /* get the destination processors' id send_rank, nsends and nrecvs */
2138     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2139     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2140     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
2141     recv_rank = send_rank + size;
2142     np_subcomm = size/nsubcomm;
2143     nleftover  = size - nsubcomm*np_subcomm;
2144     nsends = 0; nrecvs = 0;
2145     for (i=0; i<size; i++){ /* i=rank*/
2146       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2147         send_rank[nsends] = i; nsends++;
2148         recv_rank[nrecvs++] = i;
2149       }
2150     }
2151     if (rank >= size - nleftover){/* this proc is a leftover processor */
2152       i = size-nleftover-1;
2153       j = 0;
2154       while (j < nsubcomm - nleftover){
2155         send_rank[nsends++] = i;
2156         i--; j++;
2157       }
2158     }
2159 
2160     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2161       for (i=0; i<nleftover; i++){
2162         recv_rank[nrecvs++] = size-nleftover+i;
2163       }
2164     }
2165 
2166     /* allocate sbuf_j, sbuf_a */
2167     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2168     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2169     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2170   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2171 
2172   /* copy mat's local entries into the buffers */
2173   if (reuse == MAT_INITIAL_MATRIX){
2174     rownz_max = 0;
2175     rptr = sbuf_j;
2176     cols = sbuf_j + rend-rstart + 1;
2177     vals = sbuf_a;
2178     rptr[0] = 0;
2179     for (i=0; i<rend-rstart; i++){
2180       row = i + rstart;
2181       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2182       ncols  = nzA + nzB;
2183       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2184       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2185       /* load the column indices for this row into cols */
2186       lwrite = 0;
2187       for (l=0; l<nzB; l++) {
2188         if ((ctmp = bmap[cworkB[l]]) < cstart){
2189           vals[lwrite]   = aworkB[l];
2190           cols[lwrite++] = ctmp;
2191         }
2192       }
2193       for (l=0; l<nzA; l++){
2194         vals[lwrite]   = aworkA[l];
2195         cols[lwrite++] = cstart + cworkA[l];
2196       }
2197       for (l=0; l<nzB; l++) {
2198         if ((ctmp = bmap[cworkB[l]]) >= cend){
2199           vals[lwrite]   = aworkB[l];
2200           cols[lwrite++] = ctmp;
2201         }
2202       }
2203       vals += ncols;
2204       cols += ncols;
2205       rptr[i+1] = rptr[i] + ncols;
2206       if (rownz_max < ncols) rownz_max = ncols;
2207     }
2208     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2209   } else { /* only copy matrix values into sbuf_a */
2210     rptr = sbuf_j;
2211     vals = sbuf_a;
2212     rptr[0] = 0;
2213     for (i=0; i<rend-rstart; i++){
2214       row = i + rstart;
2215       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2216       ncols  = nzA + nzB;
2217       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2218       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2219       lwrite = 0;
2220       for (l=0; l<nzB; l++) {
2221         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2222       }
2223       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2224       for (l=0; l<nzB; l++) {
2225         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2226       }
2227       vals += ncols;
2228       rptr[i+1] = rptr[i] + ncols;
2229     }
2230   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2231 
2232   /* send nzlocal to others, and recv other's nzlocal */
2233   /*--------------------------------------------------*/
2234   if (reuse == MAT_INITIAL_MATRIX){
2235     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2236     s_waits2 = s_waits3 + nsends;
2237     s_waits1 = s_waits2 + nsends;
2238     r_waits1 = s_waits1 + nsends;
2239     r_waits2 = r_waits1 + nrecvs;
2240     r_waits3 = r_waits2 + nrecvs;
2241   } else {
2242     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2243     r_waits3 = s_waits3 + nsends;
2244   }
2245 
2246   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2247   if (reuse == MAT_INITIAL_MATRIX){
2248     /* get new tags to keep the communication clean */
2249     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2250     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2251     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2252     rbuf_nz = sbuf_nz + nsends;
2253 
2254     /* post receives of other's nzlocal */
2255     for (i=0; i<nrecvs; i++){
2256       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2257     }
2258     /* send nzlocal to others */
2259     for (i=0; i<nsends; i++){
2260       sbuf_nz[i] = nzlocal;
2261       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2262     }
2263     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2264     count = nrecvs;
2265     while (count) {
2266       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2267       recv_rank[imdex] = recv_status.MPI_SOURCE;
2268       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2269       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2270 
2271       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2272       rbuf_nz[imdex] += i + 2;
2273       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2274       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2275       count--;
2276     }
2277     /* wait on sends of nzlocal */
2278     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2279     /* send mat->i,j to others, and recv from other's */
2280     /*------------------------------------------------*/
2281     for (i=0; i<nsends; i++){
2282       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2283       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2284     }
2285     /* wait on receives of mat->i,j */
2286     /*------------------------------*/
2287     count = nrecvs;
2288     while (count) {
2289       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2290       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2291       count--;
2292     }
2293     /* wait on sends of mat->i,j */
2294     /*---------------------------*/
2295     if (nsends) {
2296       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2297     }
2298   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2299 
2300   /* post receives, send and receive mat->a */
2301   /*----------------------------------------*/
2302   for (imdex=0; imdex<nrecvs; imdex++) {
2303     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2304   }
2305   for (i=0; i<nsends; i++){
2306     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2307   }
2308   count = nrecvs;
2309   while (count) {
2310     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2311     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2312     count--;
2313   }
2314   if (nsends) {
2315     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2316   }
2317 
2318   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2319 
2320   /* create redundant matrix */
2321   /*-------------------------*/
2322   if (reuse == MAT_INITIAL_MATRIX){
2323     /* compute rownz_max for preallocation */
2324     for (imdex=0; imdex<nrecvs; imdex++){
2325       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2326       rptr = rbuf_j[imdex];
2327       for (i=0; i<j; i++){
2328         ncols = rptr[i+1] - rptr[i];
2329         if (rownz_max < ncols) rownz_max = ncols;
2330       }
2331     }
2332 
2333     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2334     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2335     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2336     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2337     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2338   } else {
2339     C = *matredundant;
2340   }
2341 
2342   /* insert local matrix entries */
2343   rptr = sbuf_j;
2344   cols = sbuf_j + rend-rstart + 1;
2345   vals = sbuf_a;
2346   for (i=0; i<rend-rstart; i++){
2347     row   = i + rstart;
2348     ncols = rptr[i+1] - rptr[i];
2349     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2350     vals += ncols;
2351     cols += ncols;
2352   }
2353   /* insert received matrix entries */
2354   for (imdex=0; imdex<nrecvs; imdex++){
2355     rstart = rowrange[recv_rank[imdex]];
2356     rend   = rowrange[recv_rank[imdex]+1];
2357     rptr = rbuf_j[imdex];
2358     cols = rbuf_j[imdex] + rend-rstart + 1;
2359     vals = rbuf_a[imdex];
2360     for (i=0; i<rend-rstart; i++){
2361       row   = i + rstart;
2362       ncols = rptr[i+1] - rptr[i];
2363       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2364       vals += ncols;
2365       cols += ncols;
2366     }
2367   }
2368   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2369   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2370   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2371   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2372   if (reuse == MAT_INITIAL_MATRIX){
2373     PetscContainer container;
2374     *matredundant = C;
2375     /* create a supporting struct and attach it to C for reuse */
2376     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2377     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2378     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2379     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2380     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2381 
2382     redund->nzlocal = nzlocal;
2383     redund->nsends  = nsends;
2384     redund->nrecvs  = nrecvs;
2385     redund->send_rank = send_rank;
2386     redund->sbuf_nz = sbuf_nz;
2387     redund->sbuf_j  = sbuf_j;
2388     redund->sbuf_a  = sbuf_a;
2389     redund->rbuf_j  = rbuf_j;
2390     redund->rbuf_a  = rbuf_a;
2391 
2392     redund->MatDestroy = C->ops->destroy;
2393     C->ops->destroy    = MatDestroy_MatRedundant;
2394   }
2395   PetscFunctionReturn(0);
2396 }
2397 
2398 #undef __FUNCT__
2399 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2400 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2401 {
2402   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2403   PetscErrorCode ierr;
2404   PetscInt       i,*idxb = 0;
2405   PetscScalar    *va,*vb;
2406   Vec            vtmp;
2407 
2408   PetscFunctionBegin;
2409   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2410   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2411   if (idx) {
2412     for (i=0; i<A->rmap->n; i++) {
2413       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2414     }
2415   }
2416 
2417   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2418   if (idx) {
2419     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2420   }
2421   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2422   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2423 
2424   for (i=0; i<A->rmap->n; i++){
2425     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2426       va[i] = vb[i];
2427       if (idx) idx[i] = a->garray[idxb[i]];
2428     }
2429   }
2430 
2431   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2432   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2433   if (idxb) {
2434     ierr = PetscFree(idxb);CHKERRQ(ierr);
2435   }
2436   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2437   PetscFunctionReturn(0);
2438 }
2439 
2440 #undef __FUNCT__
2441 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2442 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2443 {
2444   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2445   PetscErrorCode ierr;
2446   PetscInt       i,*idxb = 0;
2447   PetscScalar    *va,*vb;
2448   Vec            vtmp;
2449 
2450   PetscFunctionBegin;
2451   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2452   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2453   if (idx) {
2454     for (i=0; i<A->cmap->n; i++) {
2455       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2456     }
2457   }
2458 
2459   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2460   if (idx) {
2461     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2462   }
2463   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2464   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2465 
2466   for (i=0; i<A->rmap->n; i++){
2467     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2468       va[i] = vb[i];
2469       if (idx) idx[i] = a->garray[idxb[i]];
2470     }
2471   }
2472 
2473   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2474   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2475   if (idxb) {
2476     ierr = PetscFree(idxb);CHKERRQ(ierr);
2477   }
2478   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2479   PetscFunctionReturn(0);
2480 }
2481 
2482 #undef __FUNCT__
2483 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2484 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2485 {
2486   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2487   PetscInt       n      = A->rmap->n;
2488   PetscInt       cstart = A->cmap->rstart;
2489   PetscInt      *cmap   = mat->garray;
2490   PetscInt      *diagIdx, *offdiagIdx;
2491   Vec            diagV, offdiagV;
2492   PetscScalar   *a, *diagA, *offdiagA;
2493   PetscInt       r;
2494   PetscErrorCode ierr;
2495 
2496   PetscFunctionBegin;
2497   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2498   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2499   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2500   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2501   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2502   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2503   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2504   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2505   for(r = 0; r < n; ++r) {
2506     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2507       a[r]   = diagA[r];
2508       idx[r] = cstart + diagIdx[r];
2509     } else {
2510       a[r]   = offdiagA[r];
2511       idx[r] = cmap[offdiagIdx[r]];
2512     }
2513   }
2514   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2515   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2516   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2517   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2518   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2519   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2520   PetscFunctionReturn(0);
2521 }
2522 
2523 #undef __FUNCT__
2524 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2525 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2526 {
2527   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2528   PetscInt       n      = A->rmap->n;
2529   PetscInt       cstart = A->cmap->rstart;
2530   PetscInt      *cmap   = mat->garray;
2531   PetscInt      *diagIdx, *offdiagIdx;
2532   Vec            diagV, offdiagV;
2533   PetscScalar   *a, *diagA, *offdiagA;
2534   PetscInt       r;
2535   PetscErrorCode ierr;
2536 
2537   PetscFunctionBegin;
2538   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2539   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2540   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2541   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2542   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2543   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2544   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2545   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2546   for(r = 0; r < n; ++r) {
2547     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2548       a[r]   = diagA[r];
2549       idx[r] = cstart + diagIdx[r];
2550     } else {
2551       a[r]   = offdiagA[r];
2552       idx[r] = cmap[offdiagIdx[r]];
2553     }
2554   }
2555   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2556   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2557   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2558   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2559   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2560   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2561   PetscFunctionReturn(0);
2562 }
2563 
2564 #undef __FUNCT__
2565 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2566 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2567 {
2568   PetscErrorCode ierr;
2569   Mat            *dummy;
2570 
2571   PetscFunctionBegin;
2572   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2573   *newmat = *dummy;
2574   ierr = PetscFree(dummy);CHKERRQ(ierr);
2575   PetscFunctionReturn(0);
2576 }
2577 
2578 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2579 /* -------------------------------------------------------------------*/
2580 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2581        MatGetRow_MPIAIJ,
2582        MatRestoreRow_MPIAIJ,
2583        MatMult_MPIAIJ,
2584 /* 4*/ MatMultAdd_MPIAIJ,
2585        MatMultTranspose_MPIAIJ,
2586        MatMultTransposeAdd_MPIAIJ,
2587 #ifdef PETSC_HAVE_PBGL
2588        MatSolve_MPIAIJ,
2589 #else
2590        0,
2591 #endif
2592        0,
2593        0,
2594 /*10*/ 0,
2595        0,
2596        0,
2597        MatSOR_MPIAIJ,
2598        MatTranspose_MPIAIJ,
2599 /*15*/ MatGetInfo_MPIAIJ,
2600        MatEqual_MPIAIJ,
2601        MatGetDiagonal_MPIAIJ,
2602        MatDiagonalScale_MPIAIJ,
2603        MatNorm_MPIAIJ,
2604 /*20*/ MatAssemblyBegin_MPIAIJ,
2605        MatAssemblyEnd_MPIAIJ,
2606        MatSetOption_MPIAIJ,
2607        MatZeroEntries_MPIAIJ,
2608 /*24*/ MatZeroRows_MPIAIJ,
2609        0,
2610 #ifdef PETSC_HAVE_PBGL
2611        0,
2612 #else
2613        0,
2614 #endif
2615        0,
2616        0,
2617 /*29*/ MatSetUpPreallocation_MPIAIJ,
2618 #ifdef PETSC_HAVE_PBGL
2619        0,
2620 #else
2621        0,
2622 #endif
2623        0,
2624        0,
2625        0,
2626 /*34*/ MatDuplicate_MPIAIJ,
2627        0,
2628        0,
2629        0,
2630        0,
2631 /*39*/ MatAXPY_MPIAIJ,
2632        MatGetSubMatrices_MPIAIJ,
2633        MatIncreaseOverlap_MPIAIJ,
2634        MatGetValues_MPIAIJ,
2635        MatCopy_MPIAIJ,
2636 /*44*/ MatGetRowMax_MPIAIJ,
2637        MatScale_MPIAIJ,
2638        0,
2639        0,
2640        0,
2641 /*49*/ MatSetBlockSize_MPIAIJ,
2642        0,
2643        0,
2644        0,
2645        0,
2646 /*54*/ MatFDColoringCreate_MPIAIJ,
2647        0,
2648        MatSetUnfactored_MPIAIJ,
2649        MatPermute_MPIAIJ,
2650        0,
2651 /*59*/ MatGetSubMatrix_MPIAIJ,
2652        MatDestroy_MPIAIJ,
2653        MatView_MPIAIJ,
2654        0,
2655        0,
2656 /*64*/ 0,
2657        0,
2658        0,
2659        0,
2660        0,
2661 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2662        MatGetRowMinAbs_MPIAIJ,
2663        0,
2664        MatSetColoring_MPIAIJ,
2665 #if defined(PETSC_HAVE_ADIC)
2666        MatSetValuesAdic_MPIAIJ,
2667 #else
2668        0,
2669 #endif
2670        MatSetValuesAdifor_MPIAIJ,
2671 /*75*/ MatFDColoringApply_AIJ,
2672        0,
2673        0,
2674        0,
2675        0,
2676 /*80*/ 0,
2677        0,
2678        0,
2679 /*83*/ MatLoad_MPIAIJ,
2680        0,
2681        0,
2682        0,
2683        0,
2684        0,
2685 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2686        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2687        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2688        MatPtAP_Basic,
2689        MatPtAPSymbolic_MPIAIJ,
2690 /*94*/ MatPtAPNumeric_MPIAIJ,
2691        0,
2692        0,
2693        0,
2694        0,
2695 /*99*/ 0,
2696        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2697        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2698        MatConjugate_MPIAIJ,
2699        0,
2700 /*104*/MatSetValuesRow_MPIAIJ,
2701        MatRealPart_MPIAIJ,
2702        MatImaginaryPart_MPIAIJ,
2703        0,
2704        0,
2705 /*109*/0,
2706        MatGetRedundantMatrix_MPIAIJ,
2707        MatGetRowMin_MPIAIJ,
2708        0,
2709        0,
2710 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
2711        0,
2712        0,
2713        0,
2714        0,
2715        0
2716 };
2717 
2718 /* ----------------------------------------------------------------------------------------*/
2719 
2720 EXTERN_C_BEGIN
2721 #undef __FUNCT__
2722 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2723 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2724 {
2725   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2726   PetscErrorCode ierr;
2727 
2728   PetscFunctionBegin;
2729   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2730   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2731   PetscFunctionReturn(0);
2732 }
2733 EXTERN_C_END
2734 
2735 EXTERN_C_BEGIN
2736 #undef __FUNCT__
2737 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2738 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2739 {
2740   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2741   PetscErrorCode ierr;
2742 
2743   PetscFunctionBegin;
2744   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2745   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2746   PetscFunctionReturn(0);
2747 }
2748 EXTERN_C_END
2749 
2750 #include "petscpc.h"
2751 EXTERN_C_BEGIN
2752 #undef __FUNCT__
2753 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2754 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2755 {
2756   Mat_MPIAIJ     *b;
2757   PetscErrorCode ierr;
2758   PetscInt       i;
2759 
2760   PetscFunctionBegin;
2761   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2762   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2763   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2764   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2765 
2766   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2767   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2768   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2769   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2770   if (d_nnz) {
2771     for (i=0; i<B->rmap->n; i++) {
2772       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2773     }
2774   }
2775   if (o_nnz) {
2776     for (i=0; i<B->rmap->n; i++) {
2777       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2778     }
2779   }
2780   b = (Mat_MPIAIJ*)B->data;
2781 
2782   if (!B->preallocated) {
2783     /* Explicitly create 2 MATSEQAIJ matrices. */
2784     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2785     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2786     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2787     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2788     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2789     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2790     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2791     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2792   }
2793 
2794   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2795   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2796   B->preallocated = PETSC_TRUE;
2797   PetscFunctionReturn(0);
2798 }
2799 EXTERN_C_END
2800 
2801 #undef __FUNCT__
2802 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2803 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2804 {
2805   Mat            mat;
2806   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2807   PetscErrorCode ierr;
2808 
2809   PetscFunctionBegin;
2810   *newmat       = 0;
2811   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2812   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2813   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2814   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2815   a    = (Mat_MPIAIJ*)mat->data;
2816 
2817   mat->factor       = matin->factor;
2818   mat->rmap->bs      = matin->rmap->bs;
2819   mat->assembled    = PETSC_TRUE;
2820   mat->insertmode   = NOT_SET_VALUES;
2821   mat->preallocated = PETSC_TRUE;
2822 
2823   a->size           = oldmat->size;
2824   a->rank           = oldmat->rank;
2825   a->donotstash     = oldmat->donotstash;
2826   a->roworiented    = oldmat->roworiented;
2827   a->rowindices     = 0;
2828   a->rowvalues      = 0;
2829   a->getrowactive   = PETSC_FALSE;
2830 
2831   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2832   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2833 
2834   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2835   if (oldmat->colmap) {
2836 #if defined (PETSC_USE_CTABLE)
2837     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2838 #else
2839     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2840     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2841     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2842 #endif
2843   } else a->colmap = 0;
2844   if (oldmat->garray) {
2845     PetscInt len;
2846     len  = oldmat->B->cmap->n;
2847     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2848     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2849     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2850   } else a->garray = 0;
2851 
2852   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2853   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2854   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2855   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2856   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2857   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2858   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2859   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2860   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2861   *newmat = mat;
2862   PetscFunctionReturn(0);
2863 }
2864 
2865 #include "petscsys.h"
2866 
2867 #undef __FUNCT__
2868 #define __FUNCT__ "MatLoad_MPIAIJ"
2869 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2870 {
2871   Mat            A;
2872   PetscScalar    *vals,*svals;
2873   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2874   MPI_Status     status;
2875   PetscErrorCode ierr;
2876   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz;
2877   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2878   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2879   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2880   PetscInt       cend,cstart,n,*rowners;
2881   int            fd;
2882 
2883   PetscFunctionBegin;
2884   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2885   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2886   if (!rank) {
2887     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2888     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2889     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2890   }
2891 
2892   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2893   M = header[1]; N = header[2];
2894   /* determine ownership of all rows */
2895   m    = M/size + ((M % size) > rank);
2896   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2897   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2898 
2899   /* First process needs enough room for process with most rows */
2900   if (!rank) {
2901     mmax       = rowners[1];
2902     for (i=2; i<size; i++) {
2903       mmax = PetscMax(mmax,rowners[i]);
2904     }
2905   } else mmax = m;
2906 
2907   rowners[0] = 0;
2908   for (i=2; i<=size; i++) {
2909     rowners[i] += rowners[i-1];
2910   }
2911   rstart = rowners[rank];
2912   rend   = rowners[rank+1];
2913 
2914   /* distribute row lengths to all processors */
2915   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2916   if (!rank) {
2917     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2918     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2919     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2920     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2921     for (j=0; j<m; j++) {
2922       procsnz[0] += ourlens[j];
2923     }
2924     for (i=1; i<size; i++) {
2925       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2926       /* calculate the number of nonzeros on each processor */
2927       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2928         procsnz[i] += rowlengths[j];
2929       }
2930       mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]);
2931       ierr   = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2932     }
2933     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2934   } else {
2935     mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr);
2936     ierr   = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2937   }
2938 
2939   if (!rank) {
2940     /* determine max buffer needed and allocate it */
2941     maxnz = 0;
2942     for (i=0; i<size; i++) {
2943       maxnz = PetscMax(maxnz,procsnz[i]);
2944     }
2945     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2946 
2947     /* read in my part of the matrix column indices  */
2948     nz   = procsnz[0];
2949     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2950     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2951 
2952     /* read in every one elses and ship off */
2953     for (i=1; i<size; i++) {
2954       nz     = procsnz[i];
2955       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2956       mpicnt = PetscMPIIntCast(nz);
2957       ierr   = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2958     }
2959     ierr = PetscFree(cols);CHKERRQ(ierr);
2960   } else {
2961     /* determine buffer space needed for message */
2962     nz = 0;
2963     for (i=0; i<m; i++) {
2964       nz += ourlens[i];
2965     }
2966     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2967 
2968     /* receive message of column indices*/
2969     mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr);
2970     ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2971     ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr);
2972     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
2973     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
2974     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
2975   }
2976 
2977   /* determine column ownership if matrix is not square */
2978   if (N != M) {
2979     n      = N/size + ((N % size) > rank);
2980     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2981     cstart = cend - n;
2982   } else {
2983     cstart = rstart;
2984     cend   = rend;
2985     n      = cend - cstart;
2986   }
2987 
2988   /* loop over local rows, determining number of off diagonal entries */
2989   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2990   jj = 0;
2991   for (i=0; i<m; i++) {
2992     for (j=0; j<ourlens[i]; j++) {
2993       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2994       jj++;
2995     }
2996   }
2997 
2998   /* create our matrix */
2999   for (i=0; i<m; i++) {
3000     ourlens[i] -= offlens[i];
3001   }
3002   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
3003   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
3004   ierr = MatSetType(A,type);CHKERRQ(ierr);
3005   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
3006 
3007   for (i=0; i<m; i++) {
3008     ourlens[i] += offlens[i];
3009   }
3010 
3011   if (!rank) {
3012     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3013 
3014     /* read in my part of the matrix numerical values  */
3015     nz   = procsnz[0];
3016     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3017 
3018     /* insert into matrix */
3019     jj      = rstart;
3020     smycols = mycols;
3021     svals   = vals;
3022     for (i=0; i<m; i++) {
3023       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3024       smycols += ourlens[i];
3025       svals   += ourlens[i];
3026       jj++;
3027     }
3028 
3029     /* read in other processors and ship out */
3030     for (i=1; i<size; i++) {
3031       nz     = procsnz[i];
3032       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3033       mpicnt = PetscMPIIntCast(nz);
3034       ierr   = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3035     }
3036     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3037   } else {
3038     /* receive numeric values */
3039     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3040 
3041     /* receive message of values*/
3042     mpicnt = PetscMPIIntCast(nz);
3043     ierr   = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
3044     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr);
3045     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
3046     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
3047     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
3048 
3049     /* insert into matrix */
3050     jj      = rstart;
3051     smycols = mycols;
3052     svals   = vals;
3053     for (i=0; i<m; i++) {
3054       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3055       smycols += ourlens[i];
3056       svals   += ourlens[i];
3057       jj++;
3058     }
3059   }
3060   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3061   ierr = PetscFree(vals);CHKERRQ(ierr);
3062   ierr = PetscFree(mycols);CHKERRQ(ierr);
3063   ierr = PetscFree(rowners);CHKERRQ(ierr);
3064 
3065   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3066   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3067   *newmat = A;
3068   PetscFunctionReturn(0);
3069 }
3070 
3071 #undef __FUNCT__
3072 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3073 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3074 {
3075   PetscErrorCode ierr;
3076   IS             iscol_local;
3077   PetscInt       csize;
3078 
3079   PetscFunctionBegin;
3080   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3081   if (call == MAT_REUSE_MATRIX) {
3082     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3083     if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3084   } else {
3085     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3086   }
3087   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3088   if (call == MAT_INITIAL_MATRIX) {
3089     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3090     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3091   }
3092   PetscFunctionReturn(0);
3093 }
3094 
3095 #undef __FUNCT__
3096 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3097 /*
3098     Not great since it makes two copies of the submatrix, first an SeqAIJ
3099   in local and then by concatenating the local matrices the end result.
3100   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3101 
3102   Note: This requires a sequential iscol with all indices.
3103 */
3104 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3105 {
3106   PetscErrorCode ierr;
3107   PetscMPIInt    rank,size;
3108   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3109   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3110   Mat            *local,M,Mreuse;
3111   MatScalar      *vwork,*aa;
3112   MPI_Comm       comm = ((PetscObject)mat)->comm;
3113   Mat_SeqAIJ     *aij;
3114 
3115 
3116   PetscFunctionBegin;
3117   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3118   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3119 
3120   if (call ==  MAT_REUSE_MATRIX) {
3121     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3122     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3123     local = &Mreuse;
3124     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3125   } else {
3126     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3127     Mreuse = *local;
3128     ierr   = PetscFree(local);CHKERRQ(ierr);
3129   }
3130 
3131   /*
3132       m - number of local rows
3133       n - number of columns (same on all processors)
3134       rstart - first row in new global matrix generated
3135   */
3136   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3137   if (call == MAT_INITIAL_MATRIX) {
3138     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3139     ii  = aij->i;
3140     jj  = aij->j;
3141 
3142     /*
3143         Determine the number of non-zeros in the diagonal and off-diagonal
3144         portions of the matrix in order to do correct preallocation
3145     */
3146 
3147     /* first get start and end of "diagonal" columns */
3148     if (csize == PETSC_DECIDE) {
3149       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3150       if (mglobal == n) { /* square matrix */
3151 	nlocal = m;
3152       } else {
3153         nlocal = n/size + ((n % size) > rank);
3154       }
3155     } else {
3156       nlocal = csize;
3157     }
3158     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3159     rstart = rend - nlocal;
3160     if (rank == size - 1 && rend != n) {
3161       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3162     }
3163 
3164     /* next, compute all the lengths */
3165     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3166     olens = dlens + m;
3167     for (i=0; i<m; i++) {
3168       jend = ii[i+1] - ii[i];
3169       olen = 0;
3170       dlen = 0;
3171       for (j=0; j<jend; j++) {
3172         if (*jj < rstart || *jj >= rend) olen++;
3173         else dlen++;
3174         jj++;
3175       }
3176       olens[i] = olen;
3177       dlens[i] = dlen;
3178     }
3179     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3180     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3181     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3182     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3183     ierr = PetscFree(dlens);CHKERRQ(ierr);
3184   } else {
3185     PetscInt ml,nl;
3186 
3187     M = *newmat;
3188     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3189     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3190     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3191     /*
3192          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3193        rather than the slower MatSetValues().
3194     */
3195     M->was_assembled = PETSC_TRUE;
3196     M->assembled     = PETSC_FALSE;
3197   }
3198   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3199   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3200   ii  = aij->i;
3201   jj  = aij->j;
3202   aa  = aij->a;
3203   for (i=0; i<m; i++) {
3204     row   = rstart + i;
3205     nz    = ii[i+1] - ii[i];
3206     cwork = jj;     jj += nz;
3207     vwork = aa;     aa += nz;
3208     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3209   }
3210 
3211   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3212   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3213   *newmat = M;
3214 
3215   /* save submatrix used in processor for next request */
3216   if (call ==  MAT_INITIAL_MATRIX) {
3217     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3218     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3219   }
3220 
3221   PetscFunctionReturn(0);
3222 }
3223 
3224 EXTERN_C_BEGIN
3225 #undef __FUNCT__
3226 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3227 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3228 {
3229   PetscInt       m,cstart, cend,j,nnz,i,d;
3230   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3231   const PetscInt *JJ;
3232   PetscScalar    *values;
3233   PetscErrorCode ierr;
3234 
3235   PetscFunctionBegin;
3236   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3237 
3238   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3239   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3240   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3241   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3242   m      = B->rmap->n;
3243   cstart = B->cmap->rstart;
3244   cend   = B->cmap->rend;
3245   rstart = B->rmap->rstart;
3246 
3247   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
3248   o_nnz = d_nnz + m;
3249 
3250 #if defined(PETSC_USE_DEBUGGING)
3251   for (i=0; i<m; i++) {
3252     nnz     = Ii[i+1]- Ii[i];
3253     JJ      = J + Ii[i];
3254     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3255     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3256     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3257   }
3258 #endif
3259 
3260   for (i=0; i<m; i++) {
3261     nnz     = Ii[i+1]- Ii[i];
3262     JJ      = J + Ii[i];
3263     nnz_max = PetscMax(nnz_max,nnz);
3264     d       = 0;
3265     for (j=0; j<nnz; j++) {
3266       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3267     }
3268     d_nnz[i] = d;
3269     o_nnz[i] = nnz - d;
3270   }
3271   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3272   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
3273 
3274   if (v) values = (PetscScalar*)v;
3275   else {
3276     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3277     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3278   }
3279 
3280   for (i=0; i<m; i++) {
3281     ii   = i + rstart;
3282     nnz  = Ii[i+1]- Ii[i];
3283     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3284   }
3285   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3286   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3287 
3288   if (!v) {
3289     ierr = PetscFree(values);CHKERRQ(ierr);
3290   }
3291   PetscFunctionReturn(0);
3292 }
3293 EXTERN_C_END
3294 
3295 #undef __FUNCT__
3296 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3297 /*@
3298    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3299    (the default parallel PETSc format).
3300 
3301    Collective on MPI_Comm
3302 
3303    Input Parameters:
3304 +  B - the matrix
3305 .  i - the indices into j for the start of each local row (starts with zero)
3306 .  j - the column indices for each local row (starts with zero)
3307 -  v - optional values in the matrix
3308 
3309    Level: developer
3310 
3311    Notes:
3312        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3313      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3314      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3315 
3316        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3317 
3318        The format which is used for the sparse matrix input, is equivalent to a
3319     row-major ordering.. i.e for the following matrix, the input data expected is
3320     as shown:
3321 
3322         1 0 0
3323         2 0 3     P0
3324        -------
3325         4 5 6     P1
3326 
3327      Process0 [P0]: rows_owned=[0,1]
3328         i =  {0,1,3}  [size = nrow+1  = 2+1]
3329         j =  {0,0,2}  [size = nz = 6]
3330         v =  {1,2,3}  [size = nz = 6]
3331 
3332      Process1 [P1]: rows_owned=[2]
3333         i =  {0,3}    [size = nrow+1  = 1+1]
3334         j =  {0,1,2}  [size = nz = 6]
3335         v =  {4,5,6}  [size = nz = 6]
3336 
3337 .keywords: matrix, aij, compressed row, sparse, parallel
3338 
3339 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3340           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3341 @*/
3342 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3343 {
3344   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3345 
3346   PetscFunctionBegin;
3347   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3348   if (f) {
3349     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3350   }
3351   PetscFunctionReturn(0);
3352 }
3353 
3354 #undef __FUNCT__
3355 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3356 /*@C
3357    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3358    (the default parallel PETSc format).  For good matrix assembly performance
3359    the user should preallocate the matrix storage by setting the parameters
3360    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3361    performance can be increased by more than a factor of 50.
3362 
3363    Collective on MPI_Comm
3364 
3365    Input Parameters:
3366 +  A - the matrix
3367 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3368            (same value is used for all local rows)
3369 .  d_nnz - array containing the number of nonzeros in the various rows of the
3370            DIAGONAL portion of the local submatrix (possibly different for each row)
3371            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3372            The size of this array is equal to the number of local rows, i.e 'm'.
3373            You must leave room for the diagonal entry even if it is zero.
3374 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3375            submatrix (same value is used for all local rows).
3376 -  o_nnz - array containing the number of nonzeros in the various rows of the
3377            OFF-DIAGONAL portion of the local submatrix (possibly different for
3378            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3379            structure. The size of this array is equal to the number
3380            of local rows, i.e 'm'.
3381 
3382    If the *_nnz parameter is given then the *_nz parameter is ignored
3383 
3384    The AIJ format (also called the Yale sparse matrix format or
3385    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3386    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3387 
3388    The parallel matrix is partitioned such that the first m0 rows belong to
3389    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3390    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3391 
3392    The DIAGONAL portion of the local submatrix of a processor can be defined
3393    as the submatrix which is obtained by extraction the part corresponding
3394    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3395    first row that belongs to the processor, and r2 is the last row belonging
3396    to the this processor. This is a square mxm matrix. The remaining portion
3397    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3398 
3399    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3400 
3401    You can call MatGetInfo() to get information on how effective the preallocation was;
3402    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3403    You can also run with the option -info and look for messages with the string
3404    malloc in them to see if additional memory allocation was needed.
3405 
3406    Example usage:
3407 
3408    Consider the following 8x8 matrix with 34 non-zero values, that is
3409    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3410    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3411    as follows:
3412 
3413 .vb
3414             1  2  0  |  0  3  0  |  0  4
3415     Proc0   0  5  6  |  7  0  0  |  8  0
3416             9  0 10  | 11  0  0  | 12  0
3417     -------------------------------------
3418            13  0 14  | 15 16 17  |  0  0
3419     Proc1   0 18  0  | 19 20 21  |  0  0
3420             0  0  0  | 22 23  0  | 24  0
3421     -------------------------------------
3422     Proc2  25 26 27  |  0  0 28  | 29  0
3423            30  0  0  | 31 32 33  |  0 34
3424 .ve
3425 
3426    This can be represented as a collection of submatrices as:
3427 
3428 .vb
3429       A B C
3430       D E F
3431       G H I
3432 .ve
3433 
3434    Where the submatrices A,B,C are owned by proc0, D,E,F are
3435    owned by proc1, G,H,I are owned by proc2.
3436 
3437    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3438    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3439    The 'M','N' parameters are 8,8, and have the same values on all procs.
3440 
3441    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3442    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3443    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3444    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3445    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3446    matrix, ans [DF] as another SeqAIJ matrix.
3447 
3448    When d_nz, o_nz parameters are specified, d_nz storage elements are
3449    allocated for every row of the local diagonal submatrix, and o_nz
3450    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3451    One way to choose d_nz and o_nz is to use the max nonzerors per local
3452    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3453    In this case, the values of d_nz,o_nz are:
3454 .vb
3455      proc0 : dnz = 2, o_nz = 2
3456      proc1 : dnz = 3, o_nz = 2
3457      proc2 : dnz = 1, o_nz = 4
3458 .ve
3459    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3460    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3461    for proc3. i.e we are using 12+15+10=37 storage locations to store
3462    34 values.
3463 
3464    When d_nnz, o_nnz parameters are specified, the storage is specified
3465    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3466    In the above case the values for d_nnz,o_nnz are:
3467 .vb
3468      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3469      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3470      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3471 .ve
3472    Here the space allocated is sum of all the above values i.e 34, and
3473    hence pre-allocation is perfect.
3474 
3475    Level: intermediate
3476 
3477 .keywords: matrix, aij, compressed row, sparse, parallel
3478 
3479 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3480           MPIAIJ, MatGetInfo()
3481 @*/
3482 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3483 {
3484   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3485 
3486   PetscFunctionBegin;
3487   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3488   if (f) {
3489     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3490   }
3491   PetscFunctionReturn(0);
3492 }
3493 
3494 #undef __FUNCT__
3495 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3496 /*@
3497      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3498          CSR format the local rows.
3499 
3500    Collective on MPI_Comm
3501 
3502    Input Parameters:
3503 +  comm - MPI communicator
3504 .  m - number of local rows (Cannot be PETSC_DECIDE)
3505 .  n - This value should be the same as the local size used in creating the
3506        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3507        calculated if N is given) For square matrices n is almost always m.
3508 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3509 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3510 .   i - row indices
3511 .   j - column indices
3512 -   a - matrix values
3513 
3514    Output Parameter:
3515 .   mat - the matrix
3516 
3517    Level: intermediate
3518 
3519    Notes:
3520        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3521      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3522      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3523 
3524        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3525 
3526        The format which is used for the sparse matrix input, is equivalent to a
3527     row-major ordering.. i.e for the following matrix, the input data expected is
3528     as shown:
3529 
3530         1 0 0
3531         2 0 3     P0
3532        -------
3533         4 5 6     P1
3534 
3535      Process0 [P0]: rows_owned=[0,1]
3536         i =  {0,1,3}  [size = nrow+1  = 2+1]
3537         j =  {0,0,2}  [size = nz = 6]
3538         v =  {1,2,3}  [size = nz = 6]
3539 
3540      Process1 [P1]: rows_owned=[2]
3541         i =  {0,3}    [size = nrow+1  = 1+1]
3542         j =  {0,1,2}  [size = nz = 6]
3543         v =  {4,5,6}  [size = nz = 6]
3544 
3545 .keywords: matrix, aij, compressed row, sparse, parallel
3546 
3547 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3548           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3549 @*/
3550 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3551 {
3552   PetscErrorCode ierr;
3553 
3554  PetscFunctionBegin;
3555   if (i[0]) {
3556     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3557   }
3558   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3559   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3560   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3561   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3562   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3563   PetscFunctionReturn(0);
3564 }
3565 
3566 #undef __FUNCT__
3567 #define __FUNCT__ "MatCreateMPIAIJ"
3568 /*@C
3569    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3570    (the default parallel PETSc format).  For good matrix assembly performance
3571    the user should preallocate the matrix storage by setting the parameters
3572    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3573    performance can be increased by more than a factor of 50.
3574 
3575    Collective on MPI_Comm
3576 
3577    Input Parameters:
3578 +  comm - MPI communicator
3579 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3580            This value should be the same as the local size used in creating the
3581            y vector for the matrix-vector product y = Ax.
3582 .  n - This value should be the same as the local size used in creating the
3583        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3584        calculated if N is given) For square matrices n is almost always m.
3585 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3586 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3587 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3588            (same value is used for all local rows)
3589 .  d_nnz - array containing the number of nonzeros in the various rows of the
3590            DIAGONAL portion of the local submatrix (possibly different for each row)
3591            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3592            The size of this array is equal to the number of local rows, i.e 'm'.
3593            You must leave room for the diagonal entry even if it is zero.
3594 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3595            submatrix (same value is used for all local rows).
3596 -  o_nnz - array containing the number of nonzeros in the various rows of the
3597            OFF-DIAGONAL portion of the local submatrix (possibly different for
3598            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3599            structure. The size of this array is equal to the number
3600            of local rows, i.e 'm'.
3601 
3602    Output Parameter:
3603 .  A - the matrix
3604 
3605    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3606    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3607    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3608 
3609    Notes:
3610    If the *_nnz parameter is given then the *_nz parameter is ignored
3611 
3612    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3613    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3614    storage requirements for this matrix.
3615 
3616    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3617    processor than it must be used on all processors that share the object for
3618    that argument.
3619 
3620    The user MUST specify either the local or global matrix dimensions
3621    (possibly both).
3622 
3623    The parallel matrix is partitioned across processors such that the
3624    first m0 rows belong to process 0, the next m1 rows belong to
3625    process 1, the next m2 rows belong to process 2 etc.. where
3626    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3627    values corresponding to [m x N] submatrix.
3628 
3629    The columns are logically partitioned with the n0 columns belonging
3630    to 0th partition, the next n1 columns belonging to the next
3631    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3632 
3633    The DIAGONAL portion of the local submatrix on any given processor
3634    is the submatrix corresponding to the rows and columns m,n
3635    corresponding to the given processor. i.e diagonal matrix on
3636    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3637    etc. The remaining portion of the local submatrix [m x (N-n)]
3638    constitute the OFF-DIAGONAL portion. The example below better
3639    illustrates this concept.
3640 
3641    For a square global matrix we define each processor's diagonal portion
3642    to be its local rows and the corresponding columns (a square submatrix);
3643    each processor's off-diagonal portion encompasses the remainder of the
3644    local matrix (a rectangular submatrix).
3645 
3646    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3647 
3648    When calling this routine with a single process communicator, a matrix of
3649    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3650    type of communicator, use the construction mechanism:
3651      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3652 
3653    By default, this format uses inodes (identical nodes) when possible.
3654    We search for consecutive rows with the same nonzero structure, thereby
3655    reusing matrix information to achieve increased efficiency.
3656 
3657    Options Database Keys:
3658 +  -mat_no_inode  - Do not use inodes
3659 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3660 -  -mat_aij_oneindex - Internally use indexing starting at 1
3661         rather than 0.  Note that when calling MatSetValues(),
3662         the user still MUST index entries starting at 0!
3663 
3664 
3665    Example usage:
3666 
3667    Consider the following 8x8 matrix with 34 non-zero values, that is
3668    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3669    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3670    as follows:
3671 
3672 .vb
3673             1  2  0  |  0  3  0  |  0  4
3674     Proc0   0  5  6  |  7  0  0  |  8  0
3675             9  0 10  | 11  0  0  | 12  0
3676     -------------------------------------
3677            13  0 14  | 15 16 17  |  0  0
3678     Proc1   0 18  0  | 19 20 21  |  0  0
3679             0  0  0  | 22 23  0  | 24  0
3680     -------------------------------------
3681     Proc2  25 26 27  |  0  0 28  | 29  0
3682            30  0  0  | 31 32 33  |  0 34
3683 .ve
3684 
3685    This can be represented as a collection of submatrices as:
3686 
3687 .vb
3688       A B C
3689       D E F
3690       G H I
3691 .ve
3692 
3693    Where the submatrices A,B,C are owned by proc0, D,E,F are
3694    owned by proc1, G,H,I are owned by proc2.
3695 
3696    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3697    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3698    The 'M','N' parameters are 8,8, and have the same values on all procs.
3699 
3700    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3701    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3702    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3703    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3704    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3705    matrix, ans [DF] as another SeqAIJ matrix.
3706 
3707    When d_nz, o_nz parameters are specified, d_nz storage elements are
3708    allocated for every row of the local diagonal submatrix, and o_nz
3709    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3710    One way to choose d_nz and o_nz is to use the max nonzerors per local
3711    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3712    In this case, the values of d_nz,o_nz are:
3713 .vb
3714      proc0 : dnz = 2, o_nz = 2
3715      proc1 : dnz = 3, o_nz = 2
3716      proc2 : dnz = 1, o_nz = 4
3717 .ve
3718    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3719    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3720    for proc3. i.e we are using 12+15+10=37 storage locations to store
3721    34 values.
3722 
3723    When d_nnz, o_nnz parameters are specified, the storage is specified
3724    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3725    In the above case the values for d_nnz,o_nnz are:
3726 .vb
3727      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3728      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3729      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3730 .ve
3731    Here the space allocated is sum of all the above values i.e 34, and
3732    hence pre-allocation is perfect.
3733 
3734    Level: intermediate
3735 
3736 .keywords: matrix, aij, compressed row, sparse, parallel
3737 
3738 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3739           MPIAIJ, MatCreateMPIAIJWithArrays()
3740 @*/
3741 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3742 {
3743   PetscErrorCode ierr;
3744   PetscMPIInt    size;
3745 
3746   PetscFunctionBegin;
3747   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3748   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3749   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3750   if (size > 1) {
3751     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3752     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3753   } else {
3754     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3755     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3756   }
3757   PetscFunctionReturn(0);
3758 }
3759 
3760 #undef __FUNCT__
3761 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3762 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3763 {
3764   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3765 
3766   PetscFunctionBegin;
3767   *Ad     = a->A;
3768   *Ao     = a->B;
3769   *colmap = a->garray;
3770   PetscFunctionReturn(0);
3771 }
3772 
3773 #undef __FUNCT__
3774 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3775 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3776 {
3777   PetscErrorCode ierr;
3778   PetscInt       i;
3779   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3780 
3781   PetscFunctionBegin;
3782   if (coloring->ctype == IS_COLORING_GLOBAL) {
3783     ISColoringValue *allcolors,*colors;
3784     ISColoring      ocoloring;
3785 
3786     /* set coloring for diagonal portion */
3787     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3788 
3789     /* set coloring for off-diagonal portion */
3790     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3791     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3792     for (i=0; i<a->B->cmap->n; i++) {
3793       colors[i] = allcolors[a->garray[i]];
3794     }
3795     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3796     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3797     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3798     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3799   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3800     ISColoringValue *colors;
3801     PetscInt        *larray;
3802     ISColoring      ocoloring;
3803 
3804     /* set coloring for diagonal portion */
3805     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3806     for (i=0; i<a->A->cmap->n; i++) {
3807       larray[i] = i + A->cmap->rstart;
3808     }
3809     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3810     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3811     for (i=0; i<a->A->cmap->n; i++) {
3812       colors[i] = coloring->colors[larray[i]];
3813     }
3814     ierr = PetscFree(larray);CHKERRQ(ierr);
3815     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3816     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3817     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3818 
3819     /* set coloring for off-diagonal portion */
3820     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3821     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3822     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3823     for (i=0; i<a->B->cmap->n; i++) {
3824       colors[i] = coloring->colors[larray[i]];
3825     }
3826     ierr = PetscFree(larray);CHKERRQ(ierr);
3827     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3828     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3829     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3830   } else {
3831     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3832   }
3833 
3834   PetscFunctionReturn(0);
3835 }
3836 
3837 #if defined(PETSC_HAVE_ADIC)
3838 #undef __FUNCT__
3839 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3840 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3841 {
3842   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3843   PetscErrorCode ierr;
3844 
3845   PetscFunctionBegin;
3846   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3847   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3848   PetscFunctionReturn(0);
3849 }
3850 #endif
3851 
3852 #undef __FUNCT__
3853 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3854 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3855 {
3856   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3857   PetscErrorCode ierr;
3858 
3859   PetscFunctionBegin;
3860   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3861   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3862   PetscFunctionReturn(0);
3863 }
3864 
3865 #undef __FUNCT__
3866 #define __FUNCT__ "MatMerge"
3867 /*@
3868       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3869                  matrices from each processor
3870 
3871     Collective on MPI_Comm
3872 
3873    Input Parameters:
3874 +    comm - the communicators the parallel matrix will live on
3875 .    inmat - the input sequential matrices
3876 .    n - number of local columns (or PETSC_DECIDE)
3877 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3878 
3879    Output Parameter:
3880 .    outmat - the parallel matrix generated
3881 
3882     Level: advanced
3883 
3884    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3885 
3886 @*/
3887 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3888 {
3889   PetscErrorCode ierr;
3890   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3891   PetscInt       *indx;
3892   PetscScalar    *values;
3893 
3894   PetscFunctionBegin;
3895   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3896   if (scall == MAT_INITIAL_MATRIX){
3897     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3898     if (n == PETSC_DECIDE){
3899       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3900     }
3901     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3902     rstart -= m;
3903 
3904     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3905     for (i=0;i<m;i++) {
3906       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3907       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3908       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3909     }
3910     /* This routine will ONLY return MPIAIJ type matrix */
3911     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3912     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3913     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3914     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3915     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3916 
3917   } else if (scall == MAT_REUSE_MATRIX){
3918     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3919   } else {
3920     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3921   }
3922 
3923   for (i=0;i<m;i++) {
3924     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3925     Ii    = i + rstart;
3926     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3927     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3928   }
3929   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3930   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3931   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3932 
3933   PetscFunctionReturn(0);
3934 }
3935 
3936 #undef __FUNCT__
3937 #define __FUNCT__ "MatFileSplit"
3938 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3939 {
3940   PetscErrorCode    ierr;
3941   PetscMPIInt       rank;
3942   PetscInt          m,N,i,rstart,nnz;
3943   size_t            len;
3944   const PetscInt    *indx;
3945   PetscViewer       out;
3946   char              *name;
3947   Mat               B;
3948   const PetscScalar *values;
3949 
3950   PetscFunctionBegin;
3951   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3952   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3953   /* Should this be the type of the diagonal block of A? */
3954   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3955   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3956   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3957   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3958   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3959   for (i=0;i<m;i++) {
3960     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3961     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3962     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3963   }
3964   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3965   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3966 
3967   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3968   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3969   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3970   sprintf(name,"%s.%d",outfile,rank);
3971   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3972   ierr = PetscFree(name);
3973   ierr = MatView(B,out);CHKERRQ(ierr);
3974   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3975   ierr = MatDestroy(B);CHKERRQ(ierr);
3976   PetscFunctionReturn(0);
3977 }
3978 
3979 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3980 #undef __FUNCT__
3981 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3982 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3983 {
3984   PetscErrorCode       ierr;
3985   Mat_Merge_SeqsToMPI  *merge;
3986   PetscContainer       container;
3987 
3988   PetscFunctionBegin;
3989   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3990   if (container) {
3991     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3992     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3993     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3994     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3995     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3996     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3997     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3998     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3999     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4000     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4001     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4002     ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr);
4003 
4004     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4005     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4006   }
4007   ierr = PetscFree(merge);CHKERRQ(ierr);
4008 
4009   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4010   PetscFunctionReturn(0);
4011 }
4012 
4013 #include "../src/mat/utils/freespace.h"
4014 #include "petscbt.h"
4015 
4016 #undef __FUNCT__
4017 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4018 /*@C
4019       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4020                  matrices from each processor
4021 
4022     Collective on MPI_Comm
4023 
4024    Input Parameters:
4025 +    comm - the communicators the parallel matrix will live on
4026 .    seqmat - the input sequential matrices
4027 .    m - number of local rows (or PETSC_DECIDE)
4028 .    n - number of local columns (or PETSC_DECIDE)
4029 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4030 
4031    Output Parameter:
4032 .    mpimat - the parallel matrix generated
4033 
4034     Level: advanced
4035 
4036    Notes:
4037      The dimensions of the sequential matrix in each processor MUST be the same.
4038      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4039      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4040 @*/
4041 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4042 {
4043   PetscErrorCode       ierr;
4044   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4045   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4046   PetscMPIInt          size,rank,taga,*len_s;
4047   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4048   PetscInt             proc,m;
4049   PetscInt             **buf_ri,**buf_rj;
4050   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4051   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4052   MPI_Request          *s_waits,*r_waits;
4053   MPI_Status           *status;
4054   MatScalar            *aa=a->a;
4055   MatScalar            **abuf_r,*ba_i;
4056   Mat_Merge_SeqsToMPI  *merge;
4057   PetscContainer       container;
4058 
4059   PetscFunctionBegin;
4060   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4061 
4062   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4063   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4064 
4065   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4066   if (container) {
4067     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4068   }
4069   bi     = merge->bi;
4070   bj     = merge->bj;
4071   buf_ri = merge->buf_ri;
4072   buf_rj = merge->buf_rj;
4073 
4074   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4075   owners = merge->rowmap->range;
4076   len_s  = merge->len_s;
4077 
4078   /* send and recv matrix values */
4079   /*-----------------------------*/
4080   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4081   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4082 
4083   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4084   for (proc=0,k=0; proc<size; proc++){
4085     if (!len_s[proc]) continue;
4086     i = owners[proc];
4087     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4088     k++;
4089   }
4090 
4091   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4092   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4093   ierr = PetscFree(status);CHKERRQ(ierr);
4094 
4095   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4096   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4097 
4098   /* insert mat values of mpimat */
4099   /*----------------------------*/
4100   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4101   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4102   nextrow = buf_ri_k + merge->nrecv;
4103   nextai  = nextrow + merge->nrecv;
4104 
4105   for (k=0; k<merge->nrecv; k++){
4106     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4107     nrows = *(buf_ri_k[k]);
4108     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4109     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4110   }
4111 
4112   /* set values of ba */
4113   m = merge->rowmap->n;
4114   for (i=0; i<m; i++) {
4115     arow = owners[rank] + i;
4116     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4117     bnzi = bi[i+1] - bi[i];
4118     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4119 
4120     /* add local non-zero vals of this proc's seqmat into ba */
4121     anzi = ai[arow+1] - ai[arow];
4122     aj   = a->j + ai[arow];
4123     aa   = a->a + ai[arow];
4124     nextaj = 0;
4125     for (j=0; nextaj<anzi; j++){
4126       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4127         ba_i[j] += aa[nextaj++];
4128       }
4129     }
4130 
4131     /* add received vals into ba */
4132     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4133       /* i-th row */
4134       if (i == *nextrow[k]) {
4135         anzi = *(nextai[k]+1) - *nextai[k];
4136         aj   = buf_rj[k] + *(nextai[k]);
4137         aa   = abuf_r[k] + *(nextai[k]);
4138         nextaj = 0;
4139         for (j=0; nextaj<anzi; j++){
4140           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4141             ba_i[j] += aa[nextaj++];
4142           }
4143         }
4144         nextrow[k]++; nextai[k]++;
4145       }
4146     }
4147     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4148   }
4149   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4150   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4151 
4152   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4153   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4154   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4155   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4156   PetscFunctionReturn(0);
4157 }
4158 
4159 #undef __FUNCT__
4160 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4161 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4162 {
4163   PetscErrorCode       ierr;
4164   Mat                  B_mpi;
4165   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4166   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4167   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4168   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4169   PetscInt             len,proc,*dnz,*onz;
4170   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4171   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4172   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4173   MPI_Status           *status;
4174   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4175   PetscBT              lnkbt;
4176   Mat_Merge_SeqsToMPI  *merge;
4177   PetscContainer       container;
4178 
4179   PetscFunctionBegin;
4180   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4181 
4182   /* make sure it is a PETSc comm */
4183   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4184   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4185   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4186 
4187   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4188   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4189 
4190   /* determine row ownership */
4191   /*---------------------------------------------------------*/
4192   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4193   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4194   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4195   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4196   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4197   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4198   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4199 
4200   m      = merge->rowmap->n;
4201   M      = merge->rowmap->N;
4202   owners = merge->rowmap->range;
4203 
4204   /* determine the number of messages to send, their lengths */
4205   /*---------------------------------------------------------*/
4206   len_s  = merge->len_s;
4207 
4208   len = 0;  /* length of buf_si[] */
4209   merge->nsend = 0;
4210   for (proc=0; proc<size; proc++){
4211     len_si[proc] = 0;
4212     if (proc == rank){
4213       len_s[proc] = 0;
4214     } else {
4215       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4216       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4217     }
4218     if (len_s[proc]) {
4219       merge->nsend++;
4220       nrows = 0;
4221       for (i=owners[proc]; i<owners[proc+1]; i++){
4222         if (ai[i+1] > ai[i]) nrows++;
4223       }
4224       len_si[proc] = 2*(nrows+1);
4225       len += len_si[proc];
4226     }
4227   }
4228 
4229   /* determine the number and length of messages to receive for ij-structure */
4230   /*-------------------------------------------------------------------------*/
4231   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4232   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4233 
4234   /* post the Irecv of j-structure */
4235   /*-------------------------------*/
4236   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4237   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4238 
4239   /* post the Isend of j-structure */
4240   /*--------------------------------*/
4241   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
4242   sj_waits = si_waits + merge->nsend;
4243 
4244   for (proc=0, k=0; proc<size; proc++){
4245     if (!len_s[proc]) continue;
4246     i = owners[proc];
4247     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4248     k++;
4249   }
4250 
4251   /* receives and sends of j-structure are complete */
4252   /*------------------------------------------------*/
4253   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4254   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4255 
4256   /* send and recv i-structure */
4257   /*---------------------------*/
4258   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4259   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4260 
4261   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4262   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4263   for (proc=0,k=0; proc<size; proc++){
4264     if (!len_s[proc]) continue;
4265     /* form outgoing message for i-structure:
4266          buf_si[0]:                 nrows to be sent
4267                [1:nrows]:           row index (global)
4268                [nrows+1:2*nrows+1]: i-structure index
4269     */
4270     /*-------------------------------------------*/
4271     nrows = len_si[proc]/2 - 1;
4272     buf_si_i    = buf_si + nrows+1;
4273     buf_si[0]   = nrows;
4274     buf_si_i[0] = 0;
4275     nrows = 0;
4276     for (i=owners[proc]; i<owners[proc+1]; i++){
4277       anzi = ai[i+1] - ai[i];
4278       if (anzi) {
4279         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4280         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4281         nrows++;
4282       }
4283     }
4284     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4285     k++;
4286     buf_si += len_si[proc];
4287   }
4288 
4289   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4290   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4291 
4292   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4293   for (i=0; i<merge->nrecv; i++){
4294     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4295   }
4296 
4297   ierr = PetscFree(len_si);CHKERRQ(ierr);
4298   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4299   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4300   ierr = PetscFree(si_waits);CHKERRQ(ierr);
4301   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4302   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4303   ierr = PetscFree(status);CHKERRQ(ierr);
4304 
4305   /* compute a local seq matrix in each processor */
4306   /*----------------------------------------------*/
4307   /* allocate bi array and free space for accumulating nonzero column info */
4308   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4309   bi[0] = 0;
4310 
4311   /* create and initialize a linked list */
4312   nlnk = N+1;
4313   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4314 
4315   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4316   len = 0;
4317   len  = ai[owners[rank+1]] - ai[owners[rank]];
4318   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4319   current_space = free_space;
4320 
4321   /* determine symbolic info for each local row */
4322   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4323   nextrow = buf_ri_k + merge->nrecv;
4324   nextai  = nextrow + merge->nrecv;
4325   for (k=0; k<merge->nrecv; k++){
4326     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4327     nrows = *buf_ri_k[k];
4328     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4329     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4330   }
4331 
4332   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4333   len = 0;
4334   for (i=0;i<m;i++) {
4335     bnzi   = 0;
4336     /* add local non-zero cols of this proc's seqmat into lnk */
4337     arow   = owners[rank] + i;
4338     anzi   = ai[arow+1] - ai[arow];
4339     aj     = a->j + ai[arow];
4340     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4341     bnzi += nlnk;
4342     /* add received col data into lnk */
4343     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4344       if (i == *nextrow[k]) { /* i-th row */
4345         anzi = *(nextai[k]+1) - *nextai[k];
4346         aj   = buf_rj[k] + *nextai[k];
4347         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4348         bnzi += nlnk;
4349         nextrow[k]++; nextai[k]++;
4350       }
4351     }
4352     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4353 
4354     /* if free space is not available, make more free space */
4355     if (current_space->local_remaining<bnzi) {
4356       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4357       nspacedouble++;
4358     }
4359     /* copy data into free space, then initialize lnk */
4360     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4361     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4362 
4363     current_space->array           += bnzi;
4364     current_space->local_used      += bnzi;
4365     current_space->local_remaining -= bnzi;
4366 
4367     bi[i+1] = bi[i] + bnzi;
4368   }
4369 
4370   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4371 
4372   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4373   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4374   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4375 
4376   /* create symbolic parallel matrix B_mpi */
4377   /*---------------------------------------*/
4378   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4379   if (n==PETSC_DECIDE) {
4380     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4381   } else {
4382     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4383   }
4384   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4385   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4386   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4387 
4388   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4389   B_mpi->assembled     = PETSC_FALSE;
4390   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4391   merge->bi            = bi;
4392   merge->bj            = bj;
4393   merge->buf_ri        = buf_ri;
4394   merge->buf_rj        = buf_rj;
4395   merge->coi           = PETSC_NULL;
4396   merge->coj           = PETSC_NULL;
4397   merge->owners_co     = PETSC_NULL;
4398 
4399   /* attach the supporting struct to B_mpi for reuse */
4400   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4401   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4402   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4403   *mpimat = B_mpi;
4404 
4405   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4406   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4407   PetscFunctionReturn(0);
4408 }
4409 
4410 #undef __FUNCT__
4411 #define __FUNCT__ "MatMerge_SeqsToMPI"
4412 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4413 {
4414   PetscErrorCode   ierr;
4415 
4416   PetscFunctionBegin;
4417   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4418   if (scall == MAT_INITIAL_MATRIX){
4419     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4420   }
4421   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4422   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4423   PetscFunctionReturn(0);
4424 }
4425 
4426 #undef __FUNCT__
4427 #define __FUNCT__ "MatGetLocalMat"
4428 /*@
4429      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4430 
4431     Not Collective
4432 
4433    Input Parameters:
4434 +    A - the matrix
4435 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4436 
4437    Output Parameter:
4438 .    A_loc - the local sequential matrix generated
4439 
4440     Level: developer
4441 
4442 @*/
4443 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4444 {
4445   PetscErrorCode  ierr;
4446   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4447   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4448   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4449   MatScalar       *aa=a->a,*ba=b->a,*cam;
4450   PetscScalar     *ca;
4451   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4452   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4453 
4454   PetscFunctionBegin;
4455   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4456   if (scall == MAT_INITIAL_MATRIX){
4457     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4458     ci[0] = 0;
4459     for (i=0; i<am; i++){
4460       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4461     }
4462     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4463     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4464     k = 0;
4465     for (i=0; i<am; i++) {
4466       ncols_o = bi[i+1] - bi[i];
4467       ncols_d = ai[i+1] - ai[i];
4468       /* off-diagonal portion of A */
4469       for (jo=0; jo<ncols_o; jo++) {
4470         col = cmap[*bj];
4471         if (col >= cstart) break;
4472         cj[k]   = col; bj++;
4473         ca[k++] = *ba++;
4474       }
4475       /* diagonal portion of A */
4476       for (j=0; j<ncols_d; j++) {
4477         cj[k]   = cstart + *aj++;
4478         ca[k++] = *aa++;
4479       }
4480       /* off-diagonal portion of A */
4481       for (j=jo; j<ncols_o; j++) {
4482         cj[k]   = cmap[*bj++];
4483         ca[k++] = *ba++;
4484       }
4485     }
4486     /* put together the new matrix */
4487     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4488     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4489     /* Since these are PETSc arrays, change flags to free them as necessary. */
4490     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4491     mat->free_a  = PETSC_TRUE;
4492     mat->free_ij = PETSC_TRUE;
4493     mat->nonew   = 0;
4494   } else if (scall == MAT_REUSE_MATRIX){
4495     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4496     ci = mat->i; cj = mat->j; cam = mat->a;
4497     for (i=0; i<am; i++) {
4498       /* off-diagonal portion of A */
4499       ncols_o = bi[i+1] - bi[i];
4500       for (jo=0; jo<ncols_o; jo++) {
4501         col = cmap[*bj];
4502         if (col >= cstart) break;
4503         *cam++ = *ba++; bj++;
4504       }
4505       /* diagonal portion of A */
4506       ncols_d = ai[i+1] - ai[i];
4507       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4508       /* off-diagonal portion of A */
4509       for (j=jo; j<ncols_o; j++) {
4510         *cam++ = *ba++; bj++;
4511       }
4512     }
4513   } else {
4514     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4515   }
4516 
4517   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4518   PetscFunctionReturn(0);
4519 }
4520 
4521 #undef __FUNCT__
4522 #define __FUNCT__ "MatGetLocalMatCondensed"
4523 /*@C
4524      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4525 
4526     Not Collective
4527 
4528    Input Parameters:
4529 +    A - the matrix
4530 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4531 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4532 
4533    Output Parameter:
4534 .    A_loc - the local sequential matrix generated
4535 
4536     Level: developer
4537 
4538 @*/
4539 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4540 {
4541   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4542   PetscErrorCode    ierr;
4543   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4544   IS                isrowa,iscola;
4545   Mat               *aloc;
4546 
4547   PetscFunctionBegin;
4548   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4549   if (!row){
4550     start = A->rmap->rstart; end = A->rmap->rend;
4551     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4552   } else {
4553     isrowa = *row;
4554   }
4555   if (!col){
4556     start = A->cmap->rstart;
4557     cmap  = a->garray;
4558     nzA   = a->A->cmap->n;
4559     nzB   = a->B->cmap->n;
4560     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4561     ncols = 0;
4562     for (i=0; i<nzB; i++) {
4563       if (cmap[i] < start) idx[ncols++] = cmap[i];
4564       else break;
4565     }
4566     imark = i;
4567     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4568     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4569     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4570     ierr = PetscFree(idx);CHKERRQ(ierr);
4571   } else {
4572     iscola = *col;
4573   }
4574   if (scall != MAT_INITIAL_MATRIX){
4575     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4576     aloc[0] = *A_loc;
4577   }
4578   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4579   *A_loc = aloc[0];
4580   ierr = PetscFree(aloc);CHKERRQ(ierr);
4581   if (!row){
4582     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4583   }
4584   if (!col){
4585     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4586   }
4587   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4588   PetscFunctionReturn(0);
4589 }
4590 
4591 #undef __FUNCT__
4592 #define __FUNCT__ "MatGetBrowsOfAcols"
4593 /*@C
4594     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4595 
4596     Collective on Mat
4597 
4598    Input Parameters:
4599 +    A,B - the matrices in mpiaij format
4600 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4601 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4602 
4603    Output Parameter:
4604 +    rowb, colb - index sets of rows and columns of B to extract
4605 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4606 -    B_seq - the sequential matrix generated
4607 
4608     Level: developer
4609 
4610 @*/
4611 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4612 {
4613   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4614   PetscErrorCode    ierr;
4615   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4616   IS                isrowb,iscolb;
4617   Mat               *bseq;
4618 
4619   PetscFunctionBegin;
4620   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4621     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4622   }
4623   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4624 
4625   if (scall == MAT_INITIAL_MATRIX){
4626     start = A->cmap->rstart;
4627     cmap  = a->garray;
4628     nzA   = a->A->cmap->n;
4629     nzB   = a->B->cmap->n;
4630     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4631     ncols = 0;
4632     for (i=0; i<nzB; i++) {  /* row < local row index */
4633       if (cmap[i] < start) idx[ncols++] = cmap[i];
4634       else break;
4635     }
4636     imark = i;
4637     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4638     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4639     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4640     ierr = PetscFree(idx);CHKERRQ(ierr);
4641     *brstart = imark;
4642     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4643   } else {
4644     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4645     isrowb = *rowb; iscolb = *colb;
4646     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4647     bseq[0] = *B_seq;
4648   }
4649   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4650   *B_seq = bseq[0];
4651   ierr = PetscFree(bseq);CHKERRQ(ierr);
4652   if (!rowb){
4653     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4654   } else {
4655     *rowb = isrowb;
4656   }
4657   if (!colb){
4658     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4659   } else {
4660     *colb = iscolb;
4661   }
4662   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4663   PetscFunctionReturn(0);
4664 }
4665 
4666 #undef __FUNCT__
4667 #define __FUNCT__ "MatGetBrowsOfAoCols"
4668 /*@C
4669     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4670     of the OFF-DIAGONAL portion of local A
4671 
4672     Collective on Mat
4673 
4674    Input Parameters:
4675 +    A,B - the matrices in mpiaij format
4676 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4677 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4678 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4679 
4680    Output Parameter:
4681 +    B_oth - the sequential matrix generated
4682 
4683     Level: developer
4684 
4685 @*/
4686 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth)
4687 {
4688   VecScatter_MPI_General *gen_to,*gen_from;
4689   PetscErrorCode         ierr;
4690   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4691   Mat_SeqAIJ             *b_oth;
4692   VecScatter             ctx=a->Mvctx;
4693   MPI_Comm               comm=((PetscObject)ctx)->comm;
4694   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4695   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4696   PetscScalar            *rvalues,*svalues;
4697   MatScalar              *b_otha,*bufa,*bufA;
4698   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4699   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4700   MPI_Status             *sstatus,rstatus;
4701   PetscMPIInt            jj;
4702   PetscInt               *cols,sbs,rbs;
4703   PetscScalar            *vals;
4704 
4705   PetscFunctionBegin;
4706   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4707     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4708   }
4709   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4710   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4711 
4712   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4713   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4714   rvalues  = gen_from->values; /* holds the length of receiving row */
4715   svalues  = gen_to->values;   /* holds the length of sending row */
4716   nrecvs   = gen_from->n;
4717   nsends   = gen_to->n;
4718 
4719   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4720   srow     = gen_to->indices;   /* local row index to be sent */
4721   sstarts  = gen_to->starts;
4722   sprocs   = gen_to->procs;
4723   sstatus  = gen_to->sstatus;
4724   sbs      = gen_to->bs;
4725   rstarts  = gen_from->starts;
4726   rprocs   = gen_from->procs;
4727   rbs      = gen_from->bs;
4728 
4729   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4730   if (scall == MAT_INITIAL_MATRIX){
4731     /* i-array */
4732     /*---------*/
4733     /*  post receives */
4734     for (i=0; i<nrecvs; i++){
4735       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4736       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4737       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4738     }
4739 
4740     /* pack the outgoing message */
4741     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4742     rstartsj = sstartsj + nsends +1;
4743     sstartsj[0] = 0;  rstartsj[0] = 0;
4744     len = 0; /* total length of j or a array to be sent */
4745     k = 0;
4746     for (i=0; i<nsends; i++){
4747       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4748       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4749       for (j=0; j<nrows; j++) {
4750         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4751         for (l=0; l<sbs; l++){
4752           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4753           rowlen[j*sbs+l] = ncols;
4754           len += ncols;
4755           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4756         }
4757         k++;
4758       }
4759       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4760       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4761     }
4762     /* recvs and sends of i-array are completed */
4763     i = nrecvs;
4764     while (i--) {
4765       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4766     }
4767     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4768 
4769     /* allocate buffers for sending j and a arrays */
4770     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4771     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4772 
4773     /* create i-array of B_oth */
4774     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4775     b_othi[0] = 0;
4776     len = 0; /* total length of j or a array to be received */
4777     k = 0;
4778     for (i=0; i<nrecvs; i++){
4779       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4780       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4781       for (j=0; j<nrows; j++) {
4782         b_othi[k+1] = b_othi[k] + rowlen[j];
4783         len += rowlen[j]; k++;
4784       }
4785       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4786     }
4787 
4788     /* allocate space for j and a arrrays of B_oth */
4789     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4790     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4791 
4792     /* j-array */
4793     /*---------*/
4794     /*  post receives of j-array */
4795     for (i=0; i<nrecvs; i++){
4796       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4797       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4798     }
4799 
4800     /* pack the outgoing message j-array */
4801     k = 0;
4802     for (i=0; i<nsends; i++){
4803       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4804       bufJ = bufj+sstartsj[i];
4805       for (j=0; j<nrows; j++) {
4806         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4807         for (ll=0; ll<sbs; ll++){
4808           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4809           for (l=0; l<ncols; l++){
4810             *bufJ++ = cols[l];
4811           }
4812           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4813         }
4814       }
4815       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4816     }
4817 
4818     /* recvs and sends of j-array are completed */
4819     i = nrecvs;
4820     while (i--) {
4821       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4822     }
4823     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4824   } else if (scall == MAT_REUSE_MATRIX){
4825     sstartsj = *startsj;
4826     rstartsj = sstartsj + nsends +1;
4827     bufa     = *bufa_ptr;
4828     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4829     b_otha   = b_oth->a;
4830   } else {
4831     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4832   }
4833 
4834   /* a-array */
4835   /*---------*/
4836   /*  post receives of a-array */
4837   for (i=0; i<nrecvs; i++){
4838     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4839     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4840   }
4841 
4842   /* pack the outgoing message a-array */
4843   k = 0;
4844   for (i=0; i<nsends; i++){
4845     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4846     bufA = bufa+sstartsj[i];
4847     for (j=0; j<nrows; j++) {
4848       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4849       for (ll=0; ll<sbs; ll++){
4850         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4851         for (l=0; l<ncols; l++){
4852           *bufA++ = vals[l];
4853         }
4854         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4855       }
4856     }
4857     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4858   }
4859   /* recvs and sends of a-array are completed */
4860   i = nrecvs;
4861   while (i--) {
4862     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4863   }
4864   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4865   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4866 
4867   if (scall == MAT_INITIAL_MATRIX){
4868     /* put together the new matrix */
4869     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4870 
4871     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4872     /* Since these are PETSc arrays, change flags to free them as necessary. */
4873     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4874     b_oth->free_a  = PETSC_TRUE;
4875     b_oth->free_ij = PETSC_TRUE;
4876     b_oth->nonew   = 0;
4877 
4878     ierr = PetscFree(bufj);CHKERRQ(ierr);
4879     if (!startsj || !bufa_ptr){
4880       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4881       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4882     } else {
4883       *startsj  = sstartsj;
4884       *bufa_ptr = bufa;
4885     }
4886   }
4887   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4888   PetscFunctionReturn(0);
4889 }
4890 
4891 #undef __FUNCT__
4892 #define __FUNCT__ "MatGetCommunicationStructs"
4893 /*@C
4894   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4895 
4896   Not Collective
4897 
4898   Input Parameters:
4899 . A - The matrix in mpiaij format
4900 
4901   Output Parameter:
4902 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4903 . colmap - A map from global column index to local index into lvec
4904 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4905 
4906   Level: developer
4907 
4908 @*/
4909 #if defined (PETSC_USE_CTABLE)
4910 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4911 #else
4912 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4913 #endif
4914 {
4915   Mat_MPIAIJ *a;
4916 
4917   PetscFunctionBegin;
4918   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4919   PetscValidPointer(lvec, 2)
4920   PetscValidPointer(colmap, 3)
4921   PetscValidPointer(multScatter, 4)
4922   a = (Mat_MPIAIJ *) A->data;
4923   if (lvec) *lvec = a->lvec;
4924   if (colmap) *colmap = a->colmap;
4925   if (multScatter) *multScatter = a->Mvctx;
4926   PetscFunctionReturn(0);
4927 }
4928 
4929 EXTERN_C_BEGIN
4930 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*);
4931 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*);
4932 EXTERN_C_END
4933 
4934 #undef __FUNCT__
4935 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4936 /*
4937     Computes (B'*A')' since computing B*A directly is untenable
4938 
4939                n                       p                          p
4940         (              )       (              )         (                  )
4941       m (      A       )  *  n (       B      )   =   m (         C        )
4942         (              )       (              )         (                  )
4943 
4944 */
4945 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4946 {
4947   PetscErrorCode     ierr;
4948   Mat                At,Bt,Ct;
4949 
4950   PetscFunctionBegin;
4951   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4952   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4953   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4954   ierr = MatDestroy(At);CHKERRQ(ierr);
4955   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4956   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4957   ierr = MatDestroy(Ct);CHKERRQ(ierr);
4958   PetscFunctionReturn(0);
4959 }
4960 
4961 #undef __FUNCT__
4962 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4963 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4964 {
4965   PetscErrorCode ierr;
4966   PetscInt       m=A->rmap->n,n=B->cmap->n;
4967   Mat            Cmat;
4968 
4969   PetscFunctionBegin;
4970   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4971   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
4972   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4973   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4974   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
4975   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4976   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4977   *C   = Cmat;
4978   PetscFunctionReturn(0);
4979 }
4980 
4981 /* ----------------------------------------------------------------*/
4982 #undef __FUNCT__
4983 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4984 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4985 {
4986   PetscErrorCode ierr;
4987 
4988   PetscFunctionBegin;
4989   if (scall == MAT_INITIAL_MATRIX){
4990     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4991   }
4992   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4993   PetscFunctionReturn(0);
4994 }
4995 
4996 EXTERN_C_BEGIN
4997 #if defined(PETSC_HAVE_MUMPS)
4998 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*);
4999 #endif
5000 #if defined(PETSC_HAVE_PASTIX)
5001 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5002 #endif
5003 #if defined(PETSC_HAVE_SUPERLU_DIST)
5004 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5005 #endif
5006 #if defined(PETSC_HAVE_SPOOLES)
5007 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5008 #endif
5009 EXTERN_C_END
5010 
5011 /*MC
5012    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5013 
5014    Options Database Keys:
5015 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5016 
5017   Level: beginner
5018 
5019 .seealso: MatCreateMPIAIJ()
5020 M*/
5021 
5022 EXTERN_C_BEGIN
5023 #undef __FUNCT__
5024 #define __FUNCT__ "MatCreate_MPIAIJ"
5025 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
5026 {
5027   Mat_MPIAIJ     *b;
5028   PetscErrorCode ierr;
5029   PetscMPIInt    size;
5030 
5031   PetscFunctionBegin;
5032   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5033 
5034   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5035   B->data         = (void*)b;
5036   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5037   B->rmap->bs     = 1;
5038   B->assembled    = PETSC_FALSE;
5039   B->mapping      = 0;
5040 
5041   B->insertmode   = NOT_SET_VALUES;
5042   b->size         = size;
5043   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5044 
5045   /* build cache for off array entries formed */
5046   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5047   b->donotstash  = PETSC_FALSE;
5048   b->colmap      = 0;
5049   b->garray      = 0;
5050   b->roworiented = PETSC_TRUE;
5051 
5052   /* stuff used for matrix vector multiply */
5053   b->lvec      = PETSC_NULL;
5054   b->Mvctx     = PETSC_NULL;
5055 
5056   /* stuff for MatGetRow() */
5057   b->rowindices   = 0;
5058   b->rowvalues    = 0;
5059   b->getrowactive = PETSC_FALSE;
5060 
5061 #if defined(PETSC_HAVE_SPOOLES)
5062   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5063                                      "MatGetFactor_mpiaij_spooles",
5064                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5065 #endif
5066 #if defined(PETSC_HAVE_MUMPS)
5067   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5068                                      "MatGetFactor_mpiaij_mumps",
5069                                      MatGetFactor_mpiaij_mumps);CHKERRQ(ierr);
5070 #endif
5071 #if defined(PETSC_HAVE_PASTIX)
5072   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5073 					   "MatGetFactor_mpiaij_pastix",
5074 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5075 #endif
5076 #if defined(PETSC_HAVE_SUPERLU_DIST)
5077   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5078                                      "MatGetFactor_mpiaij_superlu_dist",
5079                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5080 #endif
5081   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5082                                      "MatStoreValues_MPIAIJ",
5083                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5084   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5085                                      "MatRetrieveValues_MPIAIJ",
5086                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5087   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5088 				     "MatGetDiagonalBlock_MPIAIJ",
5089                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5090   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5091 				     "MatIsTranspose_MPIAIJ",
5092 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5093   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5094 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5095 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5096   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5097 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5098 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5099   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5100 				     "MatDiagonalScaleLocal_MPIAIJ",
5101 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5102   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
5103                                      "MatConvert_MPIAIJ_MPICSRPERM",
5104                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
5105   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
5106                                      "MatConvert_MPIAIJ_MPICRL",
5107                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
5108   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5109                                      "MatConvert_MPIAIJ_MPISBAIJ",
5110                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5111   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5112                                      "MatMatMult_MPIDense_MPIAIJ",
5113                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5114   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5115                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5116                                       MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5117   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5118                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5119                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5120   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5121   PetscFunctionReturn(0);
5122 }
5123 EXTERN_C_END
5124 
5125 #undef __FUNCT__
5126 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5127 /*@
5128      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5129          and "off-diagonal" part of the matrix in CSR format.
5130 
5131    Collective on MPI_Comm
5132 
5133    Input Parameters:
5134 +  comm - MPI communicator
5135 .  m - number of local rows (Cannot be PETSC_DECIDE)
5136 .  n - This value should be the same as the local size used in creating the
5137        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5138        calculated if N is given) For square matrices n is almost always m.
5139 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5140 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5141 .   i - row indices for "diagonal" portion of matrix
5142 .   j - column indices
5143 .   a - matrix values
5144 .   oi - row indices for "off-diagonal" portion of matrix
5145 .   oj - column indices
5146 -   oa - matrix values
5147 
5148    Output Parameter:
5149 .   mat - the matrix
5150 
5151    Level: advanced
5152 
5153    Notes:
5154        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5155 
5156        The i and j indices are 0 based
5157 
5158        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5159 
5160        This sets local rows and cannot be used to set off-processor values.
5161 
5162        You cannot later use MatSetValues() to change values in this matrix.
5163 
5164 .keywords: matrix, aij, compressed row, sparse, parallel
5165 
5166 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5167           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5168 @*/
5169 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5170 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5171 {
5172   PetscErrorCode ierr;
5173   Mat_MPIAIJ     *maij;
5174 
5175  PetscFunctionBegin;
5176   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5177   if (i[0]) {
5178     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5179   }
5180   if (oi[0]) {
5181     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5182   }
5183   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5184   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5185   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5186   maij = (Mat_MPIAIJ*) (*mat)->data;
5187   maij->donotstash     = PETSC_TRUE;
5188   (*mat)->preallocated = PETSC_TRUE;
5189 
5190   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5191   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5192   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5193   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5194 
5195   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5196   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5197 
5198   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5199   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5200   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5201   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5202 
5203   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5204   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5205   PetscFunctionReturn(0);
5206 }
5207 
5208 /*
5209     Special version for direct calls from Fortran
5210 */
5211 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5212 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5213 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5214 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5215 #endif
5216 
5217 /* Change these macros so can be used in void function */
5218 #undef CHKERRQ
5219 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5220 #undef SETERRQ2
5221 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5222 #undef SETERRQ
5223 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5224 
5225 EXTERN_C_BEGIN
5226 #undef __FUNCT__
5227 #define __FUNCT__ "matsetvaluesmpiaij_"
5228 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5229 {
5230   Mat             mat = *mmat;
5231   PetscInt        m = *mm, n = *mn;
5232   InsertMode      addv = *maddv;
5233   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5234   PetscScalar     value;
5235   PetscErrorCode  ierr;
5236 
5237   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5238   if (mat->insertmode == NOT_SET_VALUES) {
5239     mat->insertmode = addv;
5240   }
5241 #if defined(PETSC_USE_DEBUG)
5242   else if (mat->insertmode != addv) {
5243     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5244   }
5245 #endif
5246   {
5247   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5248   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5249   PetscTruth      roworiented = aij->roworiented;
5250 
5251   /* Some Variables required in the macro */
5252   Mat             A = aij->A;
5253   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5254   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5255   MatScalar       *aa = a->a;
5256   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5257   Mat             B = aij->B;
5258   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5259   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5260   MatScalar       *ba = b->a;
5261 
5262   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5263   PetscInt        nonew = a->nonew;
5264   MatScalar       *ap1,*ap2;
5265 
5266   PetscFunctionBegin;
5267   for (i=0; i<m; i++) {
5268     if (im[i] < 0) continue;
5269 #if defined(PETSC_USE_DEBUG)
5270     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5271 #endif
5272     if (im[i] >= rstart && im[i] < rend) {
5273       row      = im[i] - rstart;
5274       lastcol1 = -1;
5275       rp1      = aj + ai[row];
5276       ap1      = aa + ai[row];
5277       rmax1    = aimax[row];
5278       nrow1    = ailen[row];
5279       low1     = 0;
5280       high1    = nrow1;
5281       lastcol2 = -1;
5282       rp2      = bj + bi[row];
5283       ap2      = ba + bi[row];
5284       rmax2    = bimax[row];
5285       nrow2    = bilen[row];
5286       low2     = 0;
5287       high2    = nrow2;
5288 
5289       for (j=0; j<n; j++) {
5290         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5291         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5292         if (in[j] >= cstart && in[j] < cend){
5293           col = in[j] - cstart;
5294           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5295         } else if (in[j] < 0) continue;
5296 #if defined(PETSC_USE_DEBUG)
5297         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5298 #endif
5299         else {
5300           if (mat->was_assembled) {
5301             if (!aij->colmap) {
5302               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5303             }
5304 #if defined (PETSC_USE_CTABLE)
5305             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5306 	    col--;
5307 #else
5308             col = aij->colmap[in[j]] - 1;
5309 #endif
5310             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5311               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5312               col =  in[j];
5313               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5314               B = aij->B;
5315               b = (Mat_SeqAIJ*)B->data;
5316               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5317               rp2      = bj + bi[row];
5318               ap2      = ba + bi[row];
5319               rmax2    = bimax[row];
5320               nrow2    = bilen[row];
5321               low2     = 0;
5322               high2    = nrow2;
5323               bm       = aij->B->rmap->n;
5324               ba = b->a;
5325             }
5326           } else col = in[j];
5327           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5328         }
5329       }
5330     } else {
5331       if (!aij->donotstash) {
5332         if (roworiented) {
5333           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5334         } else {
5335           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5336         }
5337       }
5338     }
5339   }}
5340   PetscFunctionReturnVoid();
5341 }
5342 EXTERN_C_END
5343 
5344