xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 28adb3f739167aae2a3fdf9bf501dbd0150de1de)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "MatDistribute_MPIAIJ"
7 /*
8     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
9     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
10 
11     Only for square matrices
12 */
13 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
14 {
15   PetscMPIInt    rank,size;
16   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
17   PetscErrorCode ierr;
18   Mat            mat;
19   Mat_SeqAIJ     *gmata;
20   PetscMPIInt    tag;
21   MPI_Status     status;
22   PetscTruth     aij;
23   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
24 
25   PetscFunctionBegin;
26   CHKMEMQ;
27   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
28   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
29   if (!rank) {
30     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
31     if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
32   }
33   if (reuse == MAT_INITIAL_MATRIX) {
34     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
35     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
36     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
37     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
38     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
39     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
40     rowners[0] = 0;
41     for (i=2; i<=size; i++) {
42       rowners[i] += rowners[i-1];
43     }
44     rstart = rowners[rank];
45     rend   = rowners[rank+1];
46     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
47     if (!rank) {
48       gmata = (Mat_SeqAIJ*) gmat->data;
49       /* send row lengths to all processors */
50       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
51       for (i=1; i<size; i++) {
52 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
53       }
54       /* determine number diagonal and off-diagonal counts */
55       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
56       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
57       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
58       jj = 0;
59       for (i=0; i<m; i++) {
60 	for (j=0; j<dlens[i]; j++) {
61           if (gmata->j[jj] < rstart) ld[i]++;
62 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
63 	  jj++;
64 	}
65       }
66       /* send column indices to other processes */
67       for (i=1; i<size; i++) {
68 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
69 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
70 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71       }
72 
73       /* send numerical values to other processes */
74       for (i=1; i<size; i++) {
75         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
76         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
77       }
78       gmataa = gmata->a;
79       gmataj = gmata->j;
80 
81     } else {
82       /* receive row lengths */
83       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
84       /* receive column indices */
85       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
86       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
87       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
88       /* determine number diagonal and off-diagonal counts */
89       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
90       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
91       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
92       jj = 0;
93       for (i=0; i<m; i++) {
94 	for (j=0; j<dlens[i]; j++) {
95           if (gmataj[jj] < rstart) ld[i]++;
96 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
97 	  jj++;
98 	}
99       }
100       /* receive numerical values */
101       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
102       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
103     }
104     /* set preallocation */
105     for (i=0; i<m; i++) {
106       dlens[i] -= olens[i];
107     }
108     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
109     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
110 
111     for (i=0; i<m; i++) {
112       dlens[i] += olens[i];
113     }
114     cnt  = 0;
115     for (i=0; i<m; i++) {
116       row  = rstart + i;
117       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
118       cnt += dlens[i];
119     }
120     if (rank) {
121       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
122     }
123     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
124     ierr = PetscFree(rowners);CHKERRQ(ierr);
125     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
126     *inmat = mat;
127   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
128     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
129     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
130     mat   = *inmat;
131     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
132     if (!rank) {
133       /* send numerical values to other processes */
134       gmata = (Mat_SeqAIJ*) gmat->data;
135       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
136       gmataa = gmata->a;
137       for (i=1; i<size; i++) {
138         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
139         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
140       }
141       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
142     } else {
143       /* receive numerical values from process 0*/
144       nz   = Ad->nz + Ao->nz;
145       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
146       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
147     }
148     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
149     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
150     ad = Ad->a;
151     ao = Ao->a;
152     if (mat->rmap->n) {
153       i  = 0;
154       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
155       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
156     }
157     for (i=1; i<mat->rmap->n; i++) {
158       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
159       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
160     }
161     i--;
162     if (mat->rmap->n) {
163       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
164     }
165     if (rank) {
166       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
167     }
168   }
169   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
170   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   CHKMEMQ;
172   PetscFunctionReturn(0);
173 }
174 
175 /*
176   Local utility routine that creates a mapping from the global column
177 number to the local number in the off-diagonal part of the local
178 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
179 a slightly higher hash table cost; without it it is not scalable (each processor
180 has an order N integer array but is fast to acess.
181 */
182 #undef __FUNCT__
183 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
184 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
185 {
186   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
187   PetscErrorCode ierr;
188   PetscInt       n = aij->B->cmap->n,i;
189 
190   PetscFunctionBegin;
191 #if defined (PETSC_USE_CTABLE)
192   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
193   for (i=0; i<n; i++){
194     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
195   }
196 #else
197   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
198   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
199   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
201 #endif
202   PetscFunctionReturn(0);
203 }
204 
205 
206 #define CHUNKSIZE   15
207 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
208 { \
209     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
210     lastcol1 = col;\
211     while (high1-low1 > 5) { \
212       t = (low1+high1)/2; \
213       if (rp1[t] > col) high1 = t; \
214       else             low1  = t; \
215     } \
216       for (_i=low1; _i<high1; _i++) { \
217         if (rp1[_i] > col) break; \
218         if (rp1[_i] == col) { \
219           if (addv == ADD_VALUES) ap1[_i] += value;   \
220           else                    ap1[_i] = value; \
221           goto a_noinsert; \
222         } \
223       }  \
224       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
225       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
226       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
227       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
228       N = nrow1++ - 1; a->nz++; high1++; \
229       /* shift up all the later entries in this row */ \
230       for (ii=N; ii>=_i; ii--) { \
231         rp1[ii+1] = rp1[ii]; \
232         ap1[ii+1] = ap1[ii]; \
233       } \
234       rp1[_i] = col;  \
235       ap1[_i] = value;  \
236       a_noinsert: ; \
237       ailen[row] = nrow1; \
238 }
239 
240 
241 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
242 { \
243     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
244     lastcol2 = col;\
245     while (high2-low2 > 5) { \
246       t = (low2+high2)/2; \
247       if (rp2[t] > col) high2 = t; \
248       else             low2  = t; \
249     } \
250     for (_i=low2; _i<high2; _i++) {		\
251       if (rp2[_i] > col) break;			\
252       if (rp2[_i] == col) {			      \
253 	if (addv == ADD_VALUES) ap2[_i] += value;     \
254 	else                    ap2[_i] = value;      \
255 	goto b_noinsert;			      \
256       }						      \
257     }							      \
258     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
259     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
260     if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
261     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
262     N = nrow2++ - 1; b->nz++; high2++;					\
263     /* shift up all the later entries in this row */			\
264     for (ii=N; ii>=_i; ii--) {						\
265       rp2[ii+1] = rp2[ii];						\
266       ap2[ii+1] = ap2[ii];						\
267     }									\
268     rp2[_i] = col;							\
269     ap2[_i] = value;							\
270     b_noinsert: ;								\
271     bilen[row] = nrow2;							\
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
276 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
277 {
278   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
279   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
280   PetscErrorCode ierr;
281   PetscInt       l,*garray = mat->garray,diag;
282 
283   PetscFunctionBegin;
284   /* code only works for square matrices A */
285 
286   /* find size of row to the left of the diagonal part */
287   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
288   row  = row - diag;
289   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
290     if (garray[b->j[b->i[row]+l]] > diag) break;
291   }
292   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
293 
294   /* diagonal part */
295   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
296 
297   /* right of diagonal part */
298   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
299   PetscFunctionReturn(0);
300 }
301 
302 #undef __FUNCT__
303 #define __FUNCT__ "MatSetValues_MPIAIJ"
304 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
305 {
306   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
307   PetscScalar    value;
308   PetscErrorCode ierr;
309   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
310   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
311   PetscTruth     roworiented = aij->roworiented;
312 
313   /* Some Variables required in the macro */
314   Mat            A = aij->A;
315   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
316   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
317   MatScalar      *aa = a->a;
318   PetscTruth     ignorezeroentries = a->ignorezeroentries;
319   Mat            B = aij->B;
320   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
321   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
322   MatScalar      *ba = b->a;
323 
324   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
325   PetscInt       nonew = a->nonew;
326   MatScalar      *ap1,*ap2;
327 
328   PetscFunctionBegin;
329   for (i=0; i<m; i++) {
330     if (im[i] < 0) continue;
331 #if defined(PETSC_USE_DEBUG)
332     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
333 #endif
334     if (im[i] >= rstart && im[i] < rend) {
335       row      = im[i] - rstart;
336       lastcol1 = -1;
337       rp1      = aj + ai[row];
338       ap1      = aa + ai[row];
339       rmax1    = aimax[row];
340       nrow1    = ailen[row];
341       low1     = 0;
342       high1    = nrow1;
343       lastcol2 = -1;
344       rp2      = bj + bi[row];
345       ap2      = ba + bi[row];
346       rmax2    = bimax[row];
347       nrow2    = bilen[row];
348       low2     = 0;
349       high2    = nrow2;
350 
351       for (j=0; j<n; j++) {
352         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
353         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
354         if (in[j] >= cstart && in[j] < cend){
355           col = in[j] - cstart;
356           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
357         } else if (in[j] < 0) continue;
358 #if defined(PETSC_USE_DEBUG)
359         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
360 #endif
361         else {
362           if (mat->was_assembled) {
363             if (!aij->colmap) {
364               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
365             }
366 #if defined (PETSC_USE_CTABLE)
367             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
368 	    col--;
369 #else
370             col = aij->colmap[in[j]] - 1;
371 #endif
372             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
373               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
374               col =  in[j];
375               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
376               B = aij->B;
377               b = (Mat_SeqAIJ*)B->data;
378               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
379               rp2      = bj + bi[row];
380               ap2      = ba + bi[row];
381               rmax2    = bimax[row];
382               nrow2    = bilen[row];
383               low2     = 0;
384               high2    = nrow2;
385               bm       = aij->B->rmap->n;
386               ba = b->a;
387             }
388           } else col = in[j];
389           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
390         }
391       }
392     } else {
393       if (!aij->donotstash) {
394         if (roworiented) {
395           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
396         } else {
397           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
398         }
399       }
400     }
401   }
402   PetscFunctionReturn(0);
403 }
404 
405 #undef __FUNCT__
406 #define __FUNCT__ "MatGetValues_MPIAIJ"
407 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
408 {
409   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
410   PetscErrorCode ierr;
411   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
412   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
413 
414   PetscFunctionBegin;
415   for (i=0; i<m; i++) {
416     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
417     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
418     if (idxm[i] >= rstart && idxm[i] < rend) {
419       row = idxm[i] - rstart;
420       for (j=0; j<n; j++) {
421         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
422         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
423         if (idxn[j] >= cstart && idxn[j] < cend){
424           col = idxn[j] - cstart;
425           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
426         } else {
427           if (!aij->colmap) {
428             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
429           }
430 #if defined (PETSC_USE_CTABLE)
431           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
432           col --;
433 #else
434           col = aij->colmap[idxn[j]] - 1;
435 #endif
436           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
437           else {
438             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
439           }
440         }
441       }
442     } else {
443       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
444     }
445   }
446   PetscFunctionReturn(0);
447 }
448 
449 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
453 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
454 {
455   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
456   PetscErrorCode ierr;
457   PetscInt       nstash,reallocs;
458   InsertMode     addv;
459 
460   PetscFunctionBegin;
461   if (aij->donotstash) {
462     PetscFunctionReturn(0);
463   }
464 
465   /* make sure all processors are either in INSERTMODE or ADDMODE */
466   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
467   if (addv == (ADD_VALUES|INSERT_VALUES)) {
468     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
469   }
470   mat->insertmode = addv; /* in case this processor had no cache */
471 
472   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
473   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
474   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
475   PetscFunctionReturn(0);
476 }
477 
478 #undef __FUNCT__
479 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
480 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
481 {
482   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
483   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
484   PetscErrorCode ierr;
485   PetscMPIInt    n;
486   PetscInt       i,j,rstart,ncols,flg;
487   PetscInt       *row,*col;
488   PetscTruth     other_disassembled;
489   PetscScalar    *val;
490   InsertMode     addv = mat->insertmode;
491 
492   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
493   PetscFunctionBegin;
494   if (!aij->donotstash) {
495     while (1) {
496       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
497       if (!flg) break;
498 
499       for (i=0; i<n;) {
500         /* Now identify the consecutive vals belonging to the same row */
501         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
502         if (j < n) ncols = j-i;
503         else       ncols = n-i;
504         /* Now assemble all these values with a single function call */
505         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
506         i = j;
507       }
508     }
509     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
510   }
511   a->compressedrow.use     = PETSC_FALSE;
512   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
513   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
514 
515   /* determine if any processor has disassembled, if so we must
516      also disassemble ourselfs, in order that we may reassemble. */
517   /*
518      if nonzero structure of submatrix B cannot change then we know that
519      no processor disassembled thus we can skip this stuff
520   */
521   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
522     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
523     if (mat->was_assembled && !other_disassembled) {
524       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
525     }
526   }
527   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
528     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
529   }
530   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
531   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
532   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
533   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
534 
535   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
536   aij->rowvalues = 0;
537 
538   /* used by MatAXPY() */
539   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
540   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
541 
542   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
543   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
544   PetscFunctionReturn(0);
545 }
546 
547 #undef __FUNCT__
548 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
549 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
550 {
551   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
552   PetscErrorCode ierr;
553 
554   PetscFunctionBegin;
555   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
556   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatZeroRows_MPIAIJ"
562 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
563 {
564   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
565   PetscErrorCode ierr;
566   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
567   PetscInt       i,*owners = A->rmap->range;
568   PetscInt       *nprocs,j,idx,nsends,row;
569   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
570   PetscInt       *rvalues,count,base,slen,*source;
571   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
572   MPI_Comm       comm = ((PetscObject)A)->comm;
573   MPI_Request    *send_waits,*recv_waits;
574   MPI_Status     recv_status,*send_status;
575 #if defined(PETSC_DEBUG)
576   PetscTruth     found = PETSC_FALSE;
577 #endif
578 
579   PetscFunctionBegin;
580   /*  first count number of contributors to each processor */
581   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
582   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
583   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
584   j = 0;
585   for (i=0; i<N; i++) {
586     if (lastidx > (idx = rows[i])) j = 0;
587     lastidx = idx;
588     for (; j<size; j++) {
589       if (idx >= owners[j] && idx < owners[j+1]) {
590         nprocs[2*j]++;
591         nprocs[2*j+1] = 1;
592         owner[i] = j;
593 #if defined(PETSC_DEBUG)
594         found = PETSC_TRUE;
595 #endif
596         break;
597       }
598     }
599 #if defined(PETSC_DEBUG)
600     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
601     found = PETSC_FALSE;
602 #endif
603   }
604   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
605 
606   /* inform other processors of number of messages and max length*/
607   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
608 
609   /* post receives:   */
610   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
611   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
612   for (i=0; i<nrecvs; i++) {
613     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
614   }
615 
616   /* do sends:
617       1) starts[i] gives the starting index in svalues for stuff going to
618          the ith processor
619   */
620   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
621   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
622   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
623   starts[0] = 0;
624   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
625   for (i=0; i<N; i++) {
626     svalues[starts[owner[i]]++] = rows[i];
627   }
628 
629   starts[0] = 0;
630   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
631   count = 0;
632   for (i=0; i<size; i++) {
633     if (nprocs[2*i+1]) {
634       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
635     }
636   }
637   ierr = PetscFree(starts);CHKERRQ(ierr);
638 
639   base = owners[rank];
640 
641   /*  wait on receives */
642   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
643   source = lens + nrecvs;
644   count  = nrecvs; slen = 0;
645   while (count) {
646     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
647     /* unpack receives into our local space */
648     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
649     source[imdex]  = recv_status.MPI_SOURCE;
650     lens[imdex]    = n;
651     slen          += n;
652     count--;
653   }
654   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
655 
656   /* move the data into the send scatter */
657   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
658   count = 0;
659   for (i=0; i<nrecvs; i++) {
660     values = rvalues + i*nmax;
661     for (j=0; j<lens[i]; j++) {
662       lrows[count++] = values[j] - base;
663     }
664   }
665   ierr = PetscFree(rvalues);CHKERRQ(ierr);
666   ierr = PetscFree(lens);CHKERRQ(ierr);
667   ierr = PetscFree(owner);CHKERRQ(ierr);
668   ierr = PetscFree(nprocs);CHKERRQ(ierr);
669 
670   /* actually zap the local rows */
671   /*
672         Zero the required rows. If the "diagonal block" of the matrix
673      is square and the user wishes to set the diagonal we use separate
674      code so that MatSetValues() is not called for each diagonal allocating
675      new memory, thus calling lots of mallocs and slowing things down.
676 
677   */
678   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
679   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
680   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
681     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
682   } else if (diag != 0.0) {
683     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
684     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
685       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
687     }
688     for (i = 0; i < slen; i++) {
689       row  = lrows[i] + rstart;
690       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
691     }
692     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
693     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
694   } else {
695     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
696   }
697   ierr = PetscFree(lrows);CHKERRQ(ierr);
698 
699   /* wait on sends */
700   if (nsends) {
701     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
702     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
703     ierr = PetscFree(send_status);CHKERRQ(ierr);
704   }
705   ierr = PetscFree(send_waits);CHKERRQ(ierr);
706   ierr = PetscFree(svalues);CHKERRQ(ierr);
707 
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNCT__
712 #define __FUNCT__ "MatMult_MPIAIJ"
713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
714 {
715   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
716   PetscErrorCode ierr;
717   PetscInt       nt;
718 
719   PetscFunctionBegin;
720   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
721   if (nt != A->cmap->n) {
722     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
723   }
724   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
725   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
726   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
727   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
728   PetscFunctionReturn(0);
729 }
730 
731 #undef __FUNCT__
732 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
733 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
734 {
735   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
736   PetscErrorCode ierr;
737 
738   PetscFunctionBegin;
739   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatMultAdd_MPIAIJ"
745 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
746 {
747   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
748   PetscErrorCode ierr;
749 
750   PetscFunctionBegin;
751   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
752   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
753   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
754   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
755   PetscFunctionReturn(0);
756 }
757 
758 #undef __FUNCT__
759 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
760 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
761 {
762   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
763   PetscErrorCode ierr;
764   PetscTruth     merged;
765 
766   PetscFunctionBegin;
767   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
768   /* do nondiagonal part */
769   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
770   if (!merged) {
771     /* send it on its way */
772     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
773     /* do local part */
774     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
775     /* receive remote parts: note this assumes the values are not actually */
776     /* added in yy until the next line, */
777     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
778   } else {
779     /* do local part */
780     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
781     /* send it on its way */
782     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
783     /* values actually were received in the Begin() but we need to call this nop */
784     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
785   }
786   PetscFunctionReturn(0);
787 }
788 
789 EXTERN_C_BEGIN
790 #undef __FUNCT__
791 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
792 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
793 {
794   MPI_Comm       comm;
795   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
796   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
797   IS             Me,Notme;
798   PetscErrorCode ierr;
799   PetscInt       M,N,first,last,*notme,i;
800   PetscMPIInt    size;
801 
802   PetscFunctionBegin;
803 
804   /* Easy test: symmetric diagonal block */
805   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
806   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
807   if (!*f) PetscFunctionReturn(0);
808   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
809   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
810   if (size == 1) PetscFunctionReturn(0);
811 
812   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
813   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
814   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
815   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
816   for (i=0; i<first; i++) notme[i] = i;
817   for (i=last; i<M; i++) notme[i-last+first] = i;
818   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
819   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
820   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
821   Aoff = Aoffs[0];
822   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
823   Boff = Boffs[0];
824   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
825   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
826   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
827   ierr = ISDestroy(Me);CHKERRQ(ierr);
828   ierr = ISDestroy(Notme);CHKERRQ(ierr);
829 
830   PetscFunctionReturn(0);
831 }
832 EXTERN_C_END
833 
834 #undef __FUNCT__
835 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
836 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
837 {
838   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
839   PetscErrorCode ierr;
840 
841   PetscFunctionBegin;
842   /* do nondiagonal part */
843   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
844   /* send it on its way */
845   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
846   /* do local part */
847   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
848   /* receive remote parts */
849   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
850   PetscFunctionReturn(0);
851 }
852 
853 /*
854   This only works correctly for square matrices where the subblock A->A is the
855    diagonal block
856 */
857 #undef __FUNCT__
858 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
859 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
860 {
861   PetscErrorCode ierr;
862   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
863 
864   PetscFunctionBegin;
865   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
866   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) {
867     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
868   }
869   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "MatScale_MPIAIJ"
875 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
876 {
877   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
878   PetscErrorCode ierr;
879 
880   PetscFunctionBegin;
881   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
882   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
883   PetscFunctionReturn(0);
884 }
885 
886 #undef __FUNCT__
887 #define __FUNCT__ "MatDestroy_MPIAIJ"
888 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
889 {
890   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
891   PetscErrorCode ierr;
892 
893   PetscFunctionBegin;
894 #if defined(PETSC_USE_LOG)
895   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
896 #endif
897   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
898   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
899   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
900   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
901 #if defined (PETSC_USE_CTABLE)
902   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
903 #else
904   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
905 #endif
906   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
907   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
908   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
909   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
910   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
911   ierr = PetscFree(aij);CHKERRQ(ierr);
912 
913   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
914   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
915   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
916   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
917   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
918   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
919   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
920   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
921   PetscFunctionReturn(0);
922 }
923 
924 #undef __FUNCT__
925 #define __FUNCT__ "MatView_MPIAIJ_Binary"
926 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
927 {
928   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
929   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
930   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
931   PetscErrorCode    ierr;
932   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
933   int               fd;
934   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
935   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
936   PetscScalar       *column_values;
937 
938   PetscFunctionBegin;
939   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
940   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
941   nz   = A->nz + B->nz;
942   if (!rank) {
943     header[0] = MAT_FILE_COOKIE;
944     header[1] = mat->rmap->N;
945     header[2] = mat->cmap->N;
946     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
947     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
948     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
949     /* get largest number of rows any processor has */
950     rlen = mat->rmap->n;
951     range = mat->rmap->range;
952     for (i=1; i<size; i++) {
953       rlen = PetscMax(rlen,range[i+1] - range[i]);
954     }
955   } else {
956     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
957     rlen = mat->rmap->n;
958   }
959 
960   /* load up the local row counts */
961   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
962   for (i=0; i<mat->rmap->n; i++) {
963     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
964   }
965 
966   /* store the row lengths to the file */
967   if (!rank) {
968     MPI_Status status;
969     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
970     for (i=1; i<size; i++) {
971       rlen = range[i+1] - range[i];
972       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
973       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
974     }
975   } else {
976     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
977   }
978   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
979 
980   /* load up the local column indices */
981   nzmax = nz; /* )th processor needs space a largest processor needs */
982   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
983   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
984   cnt  = 0;
985   for (i=0; i<mat->rmap->n; i++) {
986     for (j=B->i[i]; j<B->i[i+1]; j++) {
987       if ( (col = garray[B->j[j]]) > cstart) break;
988       column_indices[cnt++] = col;
989     }
990     for (k=A->i[i]; k<A->i[i+1]; k++) {
991       column_indices[cnt++] = A->j[k] + cstart;
992     }
993     for (; j<B->i[i+1]; j++) {
994       column_indices[cnt++] = garray[B->j[j]];
995     }
996   }
997   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
998 
999   /* store the column indices to the file */
1000   if (!rank) {
1001     MPI_Status status;
1002     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1003     for (i=1; i<size; i++) {
1004       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1005       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1006       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1007       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1008     }
1009   } else {
1010     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1011     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1012   }
1013   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1014 
1015   /* load up the local column values */
1016   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1017   cnt  = 0;
1018   for (i=0; i<mat->rmap->n; i++) {
1019     for (j=B->i[i]; j<B->i[i+1]; j++) {
1020       if ( garray[B->j[j]] > cstart) break;
1021       column_values[cnt++] = B->a[j];
1022     }
1023     for (k=A->i[i]; k<A->i[i+1]; k++) {
1024       column_values[cnt++] = A->a[k];
1025     }
1026     for (; j<B->i[i+1]; j++) {
1027       column_values[cnt++] = B->a[j];
1028     }
1029   }
1030   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1031 
1032   /* store the column values to the file */
1033   if (!rank) {
1034     MPI_Status status;
1035     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1036     for (i=1; i<size; i++) {
1037       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1038       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1039       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1040       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1041     }
1042   } else {
1043     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1044     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1045   }
1046   ierr = PetscFree(column_values);CHKERRQ(ierr);
1047   PetscFunctionReturn(0);
1048 }
1049 
1050 #undef __FUNCT__
1051 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1052 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1053 {
1054   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1055   PetscErrorCode    ierr;
1056   PetscMPIInt       rank = aij->rank,size = aij->size;
1057   PetscTruth        isdraw,iascii,isbinary;
1058   PetscViewer       sviewer;
1059   PetscViewerFormat format;
1060 
1061   PetscFunctionBegin;
1062   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1063   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1064   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1065   if (iascii) {
1066     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1067     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1068       MatInfo    info;
1069       PetscTruth inodes;
1070 
1071       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1072       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1073       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1074       if (!inodes) {
1075         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1076 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1077       } else {
1078         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1079 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1080       }
1081       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1082       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1083       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1084       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1085       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1086       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1087       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1088       PetscFunctionReturn(0);
1089     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1090       PetscInt   inodecount,inodelimit,*inodes;
1091       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1092       if (inodes) {
1093         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1094       } else {
1095         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1096       }
1097       PetscFunctionReturn(0);
1098     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1099       PetscFunctionReturn(0);
1100     }
1101   } else if (isbinary) {
1102     if (size == 1) {
1103       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1104       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1105     } else {
1106       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1107     }
1108     PetscFunctionReturn(0);
1109   } else if (isdraw) {
1110     PetscDraw  draw;
1111     PetscTruth isnull;
1112     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1113     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1114   }
1115 
1116   if (size == 1) {
1117     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1118     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1119   } else {
1120     /* assemble the entire matrix onto first processor. */
1121     Mat         A;
1122     Mat_SeqAIJ  *Aloc;
1123     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1124     MatScalar   *a;
1125 
1126     if (mat->rmap->N > 1024) {
1127       PetscTruth flg = PETSC_FALSE;
1128 
1129       ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1130       if (!flg) {
1131         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1132       }
1133     }
1134 
1135     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1136     if (!rank) {
1137       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1138     } else {
1139       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1140     }
1141     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1142     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1143     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1144     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1145 
1146     /* copy over the A part */
1147     Aloc = (Mat_SeqAIJ*)aij->A->data;
1148     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1149     row = mat->rmap->rstart;
1150     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1151     for (i=0; i<m; i++) {
1152       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1153       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1154     }
1155     aj = Aloc->j;
1156     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1157 
1158     /* copy over the B part */
1159     Aloc = (Mat_SeqAIJ*)aij->B->data;
1160     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1161     row  = mat->rmap->rstart;
1162     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1163     ct   = cols;
1164     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1165     for (i=0; i<m; i++) {
1166       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1167       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1168     }
1169     ierr = PetscFree(ct);CHKERRQ(ierr);
1170     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1171     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1172     /*
1173        Everyone has to call to draw the matrix since the graphics waits are
1174        synchronized across all processors that share the PetscDraw object
1175     */
1176     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1177     if (!rank) {
1178       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1179       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1180     }
1181     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1182     ierr = MatDestroy(A);CHKERRQ(ierr);
1183   }
1184   PetscFunctionReturn(0);
1185 }
1186 
1187 #undef __FUNCT__
1188 #define __FUNCT__ "MatView_MPIAIJ"
1189 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1190 {
1191   PetscErrorCode ierr;
1192   PetscTruth     iascii,isdraw,issocket,isbinary;
1193 
1194   PetscFunctionBegin;
1195   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1196   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1197   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1198   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1199   if (iascii || isdraw || isbinary || issocket) {
1200     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1201   } else {
1202     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1203   }
1204   PetscFunctionReturn(0);
1205 }
1206 
1207 #undef __FUNCT__
1208 #define __FUNCT__ "MatRelax_MPIAIJ"
1209 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1210 {
1211   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1212   PetscErrorCode ierr;
1213   Vec            bb1 = 0;
1214   PetscTruth     hasop;
1215 
1216   PetscFunctionBegin;
1217   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1218     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1219   }
1220 
1221   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1222     if (flag & SOR_ZERO_INITIAL_GUESS) {
1223       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1224       its--;
1225     }
1226 
1227     while (its--) {
1228       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1229       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1230 
1231       /* update rhs: bb1 = bb - B*x */
1232       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1233       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1234 
1235       /* local sweep */
1236       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1237     }
1238   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1239     if (flag & SOR_ZERO_INITIAL_GUESS) {
1240       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1241       its--;
1242     }
1243     while (its--) {
1244       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1245       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1246 
1247       /* update rhs: bb1 = bb - B*x */
1248       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1249       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1250 
1251       /* local sweep */
1252       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1253     }
1254   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1255     if (flag & SOR_ZERO_INITIAL_GUESS) {
1256       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1257       its--;
1258     }
1259     while (its--) {
1260       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1261       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1262 
1263       /* update rhs: bb1 = bb - B*x */
1264       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1265       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1266 
1267       /* local sweep */
1268       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1269     }
1270   }  else if (flag & SOR_EISENSTAT) {
1271     Vec         xx1;
1272 
1273     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1274     ierr = (*mat->A->ops->relax)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1275 
1276     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1277     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1278     if (!mat->diag) {
1279       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1280       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1281     }
1282     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1283     if (hasop) {
1284       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1285     } else {
1286       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1287     }
1288     ierr = VecAYPX(bb1,-1.0,bb);CHKERRQ(ierr);
1289     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1290 
1291     /* local sweep */
1292     ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1293     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1294     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1295   } else {
1296     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1297   }
1298 
1299   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 #undef __FUNCT__
1304 #define __FUNCT__ "MatPermute_MPIAIJ"
1305 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1306 {
1307   MPI_Comm       comm,pcomm;
1308   PetscInt       first,local_size,nrows;
1309   const PetscInt *rows;
1310   PetscMPIInt    size;
1311   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1312   PetscErrorCode ierr;
1313 
1314   PetscFunctionBegin;
1315   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1316   /* make a collective version of 'rowp' */
1317   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1318   if (pcomm==comm) {
1319     crowp = rowp;
1320   } else {
1321     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1322     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1323     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1324     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1325   }
1326   /* collect the global row permutation and invert it */
1327   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1328   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1329   if (pcomm!=comm) {
1330     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1331   }
1332   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1333   /* get the local target indices */
1334   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1335   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1336   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1337   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
1338   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1339   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1340   /* the column permutation is so much easier;
1341      make a local version of 'colp' and invert it */
1342   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1343   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1344   if (size==1) {
1345     lcolp = colp;
1346   } else {
1347     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1348     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1349     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
1350   }
1351   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1352   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1353   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1354   if (size>1) {
1355     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1356     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1357   }
1358   /* now we just get the submatrix */
1359   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1360   /* clean up */
1361   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1362   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 #undef __FUNCT__
1367 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1368 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1369 {
1370   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1371   Mat            A = mat->A,B = mat->B;
1372   PetscErrorCode ierr;
1373   PetscReal      isend[5],irecv[5];
1374 
1375   PetscFunctionBegin;
1376   info->block_size     = 1.0;
1377   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1378   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1379   isend[3] = info->memory;  isend[4] = info->mallocs;
1380   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1381   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1382   isend[3] += info->memory;  isend[4] += info->mallocs;
1383   if (flag == MAT_LOCAL) {
1384     info->nz_used      = isend[0];
1385     info->nz_allocated = isend[1];
1386     info->nz_unneeded  = isend[2];
1387     info->memory       = isend[3];
1388     info->mallocs      = isend[4];
1389   } else if (flag == MAT_GLOBAL_MAX) {
1390     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1391     info->nz_used      = irecv[0];
1392     info->nz_allocated = irecv[1];
1393     info->nz_unneeded  = irecv[2];
1394     info->memory       = irecv[3];
1395     info->mallocs      = irecv[4];
1396   } else if (flag == MAT_GLOBAL_SUM) {
1397     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1398     info->nz_used      = irecv[0];
1399     info->nz_allocated = irecv[1];
1400     info->nz_unneeded  = irecv[2];
1401     info->memory       = irecv[3];
1402     info->mallocs      = irecv[4];
1403   }
1404   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1405   info->fill_ratio_needed = 0;
1406   info->factor_mallocs    = 0;
1407 
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 #undef __FUNCT__
1412 #define __FUNCT__ "MatSetOption_MPIAIJ"
1413 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1414 {
1415   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1416   PetscErrorCode ierr;
1417 
1418   PetscFunctionBegin;
1419   switch (op) {
1420   case MAT_NEW_NONZERO_LOCATIONS:
1421   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1422   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1423   case MAT_KEEP_NONZERO_PATTERN:
1424   case MAT_NEW_NONZERO_LOCATION_ERR:
1425   case MAT_USE_INODES:
1426   case MAT_IGNORE_ZERO_ENTRIES:
1427     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1428     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1429     break;
1430   case MAT_ROW_ORIENTED:
1431     a->roworiented = flg;
1432     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1433     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1434     break;
1435   case MAT_NEW_DIAGONALS:
1436     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1437     break;
1438   case MAT_IGNORE_OFF_PROC_ENTRIES:
1439     a->donotstash = PETSC_TRUE;
1440     break;
1441   case MAT_SYMMETRIC:
1442     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1443     break;
1444   case MAT_STRUCTURALLY_SYMMETRIC:
1445   case MAT_HERMITIAN:
1446   case MAT_SYMMETRY_ETERNAL:
1447     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1448     break;
1449   default:
1450     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1451   }
1452   PetscFunctionReturn(0);
1453 }
1454 
1455 #undef __FUNCT__
1456 #define __FUNCT__ "MatGetRow_MPIAIJ"
1457 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1458 {
1459   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1460   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1461   PetscErrorCode ierr;
1462   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1463   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1464   PetscInt       *cmap,*idx_p;
1465 
1466   PetscFunctionBegin;
1467   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1468   mat->getrowactive = PETSC_TRUE;
1469 
1470   if (!mat->rowvalues && (idx || v)) {
1471     /*
1472         allocate enough space to hold information from the longest row.
1473     */
1474     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1475     PetscInt     max = 1,tmp;
1476     for (i=0; i<matin->rmap->n; i++) {
1477       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1478       if (max < tmp) { max = tmp; }
1479     }
1480     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1481     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1482   }
1483 
1484   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1485   lrow = row - rstart;
1486 
1487   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1488   if (!v)   {pvA = 0; pvB = 0;}
1489   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1490   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1491   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1492   nztot = nzA + nzB;
1493 
1494   cmap  = mat->garray;
1495   if (v  || idx) {
1496     if (nztot) {
1497       /* Sort by increasing column numbers, assuming A and B already sorted */
1498       PetscInt imark = -1;
1499       if (v) {
1500         *v = v_p = mat->rowvalues;
1501         for (i=0; i<nzB; i++) {
1502           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1503           else break;
1504         }
1505         imark = i;
1506         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1507         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1508       }
1509       if (idx) {
1510         *idx = idx_p = mat->rowindices;
1511         if (imark > -1) {
1512           for (i=0; i<imark; i++) {
1513             idx_p[i] = cmap[cworkB[i]];
1514           }
1515         } else {
1516           for (i=0; i<nzB; i++) {
1517             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1518             else break;
1519           }
1520           imark = i;
1521         }
1522         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1523         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1524       }
1525     } else {
1526       if (idx) *idx = 0;
1527       if (v)   *v   = 0;
1528     }
1529   }
1530   *nz = nztot;
1531   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1532   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1533   PetscFunctionReturn(0);
1534 }
1535 
1536 #undef __FUNCT__
1537 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1538 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1539 {
1540   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1541 
1542   PetscFunctionBegin;
1543   if (!aij->getrowactive) {
1544     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1545   }
1546   aij->getrowactive = PETSC_FALSE;
1547   PetscFunctionReturn(0);
1548 }
1549 
1550 #undef __FUNCT__
1551 #define __FUNCT__ "MatNorm_MPIAIJ"
1552 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1553 {
1554   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1555   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1556   PetscErrorCode ierr;
1557   PetscInt       i,j,cstart = mat->cmap->rstart;
1558   PetscReal      sum = 0.0;
1559   MatScalar      *v;
1560 
1561   PetscFunctionBegin;
1562   if (aij->size == 1) {
1563     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1564   } else {
1565     if (type == NORM_FROBENIUS) {
1566       v = amat->a;
1567       for (i=0; i<amat->nz; i++) {
1568 #if defined(PETSC_USE_COMPLEX)
1569         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1570 #else
1571         sum += (*v)*(*v); v++;
1572 #endif
1573       }
1574       v = bmat->a;
1575       for (i=0; i<bmat->nz; i++) {
1576 #if defined(PETSC_USE_COMPLEX)
1577         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1578 #else
1579         sum += (*v)*(*v); v++;
1580 #endif
1581       }
1582       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1583       *norm = sqrt(*norm);
1584     } else if (type == NORM_1) { /* max column norm */
1585       PetscReal *tmp,*tmp2;
1586       PetscInt  *jj,*garray = aij->garray;
1587       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1588       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1589       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1590       *norm = 0.0;
1591       v = amat->a; jj = amat->j;
1592       for (j=0; j<amat->nz; j++) {
1593         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1594       }
1595       v = bmat->a; jj = bmat->j;
1596       for (j=0; j<bmat->nz; j++) {
1597         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1598       }
1599       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1600       for (j=0; j<mat->cmap->N; j++) {
1601         if (tmp2[j] > *norm) *norm = tmp2[j];
1602       }
1603       ierr = PetscFree(tmp);CHKERRQ(ierr);
1604       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1605     } else if (type == NORM_INFINITY) { /* max row norm */
1606       PetscReal ntemp = 0.0;
1607       for (j=0; j<aij->A->rmap->n; j++) {
1608         v = amat->a + amat->i[j];
1609         sum = 0.0;
1610         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1611           sum += PetscAbsScalar(*v); v++;
1612         }
1613         v = bmat->a + bmat->i[j];
1614         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1615           sum += PetscAbsScalar(*v); v++;
1616         }
1617         if (sum > ntemp) ntemp = sum;
1618       }
1619       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1620     } else {
1621       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1622     }
1623   }
1624   PetscFunctionReturn(0);
1625 }
1626 
1627 #undef __FUNCT__
1628 #define __FUNCT__ "MatTranspose_MPIAIJ"
1629 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1630 {
1631   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1632   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1633   PetscErrorCode ierr;
1634   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1635   PetscInt       cstart=A->cmap->rstart,ncol;
1636   Mat            B;
1637   MatScalar      *array;
1638 
1639   PetscFunctionBegin;
1640   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1641 
1642   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1643   ai = Aloc->i; aj = Aloc->j;
1644   bi = Bloc->i; bj = Bloc->j;
1645   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1646     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1647     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1648     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1649     for (i=0; i<ai[ma]; i++){
1650       d_nnz[aj[i]] ++;
1651       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1652     }
1653 
1654     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1655     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1656     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1657     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1658     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1659   } else {
1660     B = *matout;
1661   }
1662 
1663   /* copy over the A part */
1664   array = Aloc->a;
1665   row = A->rmap->rstart;
1666   for (i=0; i<ma; i++) {
1667     ncol = ai[i+1]-ai[i];
1668     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1669     row++; array += ncol; aj += ncol;
1670   }
1671   aj = Aloc->j;
1672   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1673 
1674   /* copy over the B part */
1675   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1676   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1677   array = Bloc->a;
1678   row = A->rmap->rstart;
1679   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1680   cols_tmp = cols;
1681   for (i=0; i<mb; i++) {
1682     ncol = bi[i+1]-bi[i];
1683     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1684     row++; array += ncol; cols_tmp += ncol;
1685   }
1686   ierr = PetscFree(cols);CHKERRQ(ierr);
1687 
1688   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1689   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1690   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1691     *matout = B;
1692   } else {
1693     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1694   }
1695   PetscFunctionReturn(0);
1696 }
1697 
1698 #undef __FUNCT__
1699 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1700 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1701 {
1702   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1703   Mat            a = aij->A,b = aij->B;
1704   PetscErrorCode ierr;
1705   PetscInt       s1,s2,s3;
1706 
1707   PetscFunctionBegin;
1708   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1709   if (rr) {
1710     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1711     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1712     /* Overlap communication with computation. */
1713     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1714   }
1715   if (ll) {
1716     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1717     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1718     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1719   }
1720   /* scale  the diagonal block */
1721   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1722 
1723   if (rr) {
1724     /* Do a scatter end and then right scale the off-diagonal block */
1725     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1726     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1727   }
1728 
1729   PetscFunctionReturn(0);
1730 }
1731 
1732 #undef __FUNCT__
1733 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1734 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1735 {
1736   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1737   PetscErrorCode ierr;
1738 
1739   PetscFunctionBegin;
1740   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1741   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1742   PetscFunctionReturn(0);
1743 }
1744 #undef __FUNCT__
1745 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1746 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1747 {
1748   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1749   PetscErrorCode ierr;
1750 
1751   PetscFunctionBegin;
1752   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1753   PetscFunctionReturn(0);
1754 }
1755 
1756 #undef __FUNCT__
1757 #define __FUNCT__ "MatEqual_MPIAIJ"
1758 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1759 {
1760   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1761   Mat            a,b,c,d;
1762   PetscTruth     flg;
1763   PetscErrorCode ierr;
1764 
1765   PetscFunctionBegin;
1766   a = matA->A; b = matA->B;
1767   c = matB->A; d = matB->B;
1768 
1769   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1770   if (flg) {
1771     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1772   }
1773   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1774   PetscFunctionReturn(0);
1775 }
1776 
1777 #undef __FUNCT__
1778 #define __FUNCT__ "MatCopy_MPIAIJ"
1779 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1780 {
1781   PetscErrorCode ierr;
1782   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1783   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1784 
1785   PetscFunctionBegin;
1786   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1787   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1788     /* because of the column compression in the off-processor part of the matrix a->B,
1789        the number of columns in a->B and b->B may be different, hence we cannot call
1790        the MatCopy() directly on the two parts. If need be, we can provide a more
1791        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1792        then copying the submatrices */
1793     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1794   } else {
1795     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1796     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1797   }
1798   PetscFunctionReturn(0);
1799 }
1800 
1801 #undef __FUNCT__
1802 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1803 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1804 {
1805   PetscErrorCode ierr;
1806 
1807   PetscFunctionBegin;
1808   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1809   PetscFunctionReturn(0);
1810 }
1811 
1812 #include "petscblaslapack.h"
1813 #undef __FUNCT__
1814 #define __FUNCT__ "MatAXPY_MPIAIJ"
1815 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1816 {
1817   PetscErrorCode ierr;
1818   PetscInt       i;
1819   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1820   PetscBLASInt   bnz,one=1;
1821   Mat_SeqAIJ     *x,*y;
1822 
1823   PetscFunctionBegin;
1824   if (str == SAME_NONZERO_PATTERN) {
1825     PetscScalar alpha = a;
1826     x = (Mat_SeqAIJ *)xx->A->data;
1827     y = (Mat_SeqAIJ *)yy->A->data;
1828     bnz = PetscBLASIntCast(x->nz);
1829     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1830     x = (Mat_SeqAIJ *)xx->B->data;
1831     y = (Mat_SeqAIJ *)yy->B->data;
1832     bnz = PetscBLASIntCast(x->nz);
1833     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1834   } else if (str == SUBSET_NONZERO_PATTERN) {
1835     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1836 
1837     x = (Mat_SeqAIJ *)xx->B->data;
1838     y = (Mat_SeqAIJ *)yy->B->data;
1839     if (y->xtoy && y->XtoY != xx->B) {
1840       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1841       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1842     }
1843     if (!y->xtoy) { /* get xtoy */
1844       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1845       y->XtoY = xx->B;
1846       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1847     }
1848     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1849   } else {
1850     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1851   }
1852   PetscFunctionReturn(0);
1853 }
1854 
1855 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1856 
1857 #undef __FUNCT__
1858 #define __FUNCT__ "MatConjugate_MPIAIJ"
1859 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1860 {
1861 #if defined(PETSC_USE_COMPLEX)
1862   PetscErrorCode ierr;
1863   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1864 
1865   PetscFunctionBegin;
1866   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1867   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1868 #else
1869   PetscFunctionBegin;
1870 #endif
1871   PetscFunctionReturn(0);
1872 }
1873 
1874 #undef __FUNCT__
1875 #define __FUNCT__ "MatRealPart_MPIAIJ"
1876 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1877 {
1878   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1879   PetscErrorCode ierr;
1880 
1881   PetscFunctionBegin;
1882   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1883   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1884   PetscFunctionReturn(0);
1885 }
1886 
1887 #undef __FUNCT__
1888 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1889 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1890 {
1891   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1892   PetscErrorCode ierr;
1893 
1894   PetscFunctionBegin;
1895   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1896   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1897   PetscFunctionReturn(0);
1898 }
1899 
1900 #ifdef PETSC_HAVE_PBGL
1901 
1902 #include <boost/parallel/mpi/bsp_process_group.hpp>
1903 #include <boost/graph/distributed/ilu_default_graph.hpp>
1904 #include <boost/graph/distributed/ilu_0_block.hpp>
1905 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1906 #include <boost/graph/distributed/petsc/interface.hpp>
1907 #include <boost/multi_array.hpp>
1908 #include <boost/parallel/distributed_property_map->hpp>
1909 
1910 #undef __FUNCT__
1911 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1912 /*
1913   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1914 */
1915 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1916 {
1917   namespace petsc = boost::distributed::petsc;
1918 
1919   namespace graph_dist = boost::graph::distributed;
1920   using boost::graph::distributed::ilu_default::process_group_type;
1921   using boost::graph::ilu_permuted;
1922 
1923   PetscTruth      row_identity, col_identity;
1924   PetscContainer  c;
1925   PetscInt        m, n, M, N;
1926   PetscErrorCode  ierr;
1927 
1928   PetscFunctionBegin;
1929   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1930   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1931   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1932   if (!row_identity || !col_identity) {
1933     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1934   }
1935 
1936   process_group_type pg;
1937   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1938   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1939   lgraph_type&   level_graph = *lgraph_p;
1940   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1941 
1942   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1943   ilu_permuted(level_graph);
1944 
1945   /* put together the new matrix */
1946   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1947   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1948   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1949   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1950   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1951   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1952   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1953 
1954   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1955   ierr = PetscContainerSetPointer(c, lgraph_p);
1956   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1957   PetscFunctionReturn(0);
1958 }
1959 
1960 #undef __FUNCT__
1961 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1962 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1963 {
1964   PetscFunctionBegin;
1965   PetscFunctionReturn(0);
1966 }
1967 
1968 #undef __FUNCT__
1969 #define __FUNCT__ "MatSolve_MPIAIJ"
1970 /*
1971   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1972 */
1973 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1974 {
1975   namespace graph_dist = boost::graph::distributed;
1976 
1977   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1978   lgraph_type*   lgraph_p;
1979   PetscContainer c;
1980   PetscErrorCode ierr;
1981 
1982   PetscFunctionBegin;
1983   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1984   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1985   ierr = VecCopy(b, x);CHKERRQ(ierr);
1986 
1987   PetscScalar* array_x;
1988   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1989   PetscInt sx;
1990   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1991 
1992   PetscScalar* array_b;
1993   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1994   PetscInt sb;
1995   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1996 
1997   lgraph_type&   level_graph = *lgraph_p;
1998   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1999 
2000   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2001   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2002                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2003 
2004   typedef boost::iterator_property_map<array_ref_type::iterator,
2005                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2006   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2007                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2008 
2009   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2010 
2011   PetscFunctionReturn(0);
2012 }
2013 #endif
2014 
2015 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2016   PetscInt       nzlocal,nsends,nrecvs;
2017   PetscMPIInt    *send_rank;
2018   PetscInt       *sbuf_nz,*sbuf_j,**rbuf_j;
2019   PetscScalar    *sbuf_a,**rbuf_a;
2020   PetscErrorCode (*MatDestroy)(Mat);
2021 } Mat_Redundant;
2022 
2023 #undef __FUNCT__
2024 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2025 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2026 {
2027   PetscErrorCode       ierr;
2028   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2029   PetscInt             i;
2030 
2031   PetscFunctionBegin;
2032   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
2033   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2034   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2035   for (i=0; i<redund->nrecvs; i++){
2036     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2037     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2038   }
2039   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2040   ierr = PetscFree(redund);CHKERRQ(ierr);
2041   PetscFunctionReturn(0);
2042 }
2043 
2044 #undef __FUNCT__
2045 #define __FUNCT__ "MatDestroy_MatRedundant"
2046 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2047 {
2048   PetscErrorCode  ierr;
2049   PetscContainer  container;
2050   Mat_Redundant   *redund=PETSC_NULL;
2051 
2052   PetscFunctionBegin;
2053   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2054   if (container) {
2055     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2056   } else {
2057     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2058   }
2059   A->ops->destroy = redund->MatDestroy;
2060   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2061   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2062   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2063   PetscFunctionReturn(0);
2064 }
2065 
2066 #undef __FUNCT__
2067 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2068 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2069 {
2070   PetscMPIInt    rank,size;
2071   MPI_Comm       comm=((PetscObject)mat)->comm;
2072   PetscErrorCode ierr;
2073   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2074   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2075   PetscInt       *rowrange=mat->rmap->range;
2076   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2077   Mat            A=aij->A,B=aij->B,C=*matredundant;
2078   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2079   PetscScalar    *sbuf_a;
2080   PetscInt       nzlocal=a->nz+b->nz;
2081   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2082   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2083   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2084   MatScalar      *aworkA,*aworkB;
2085   PetscScalar    *vals;
2086   PetscMPIInt    tag1,tag2,tag3,imdex;
2087   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2088                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2089   MPI_Status     recv_status,*send_status;
2090   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2091   PetscInt       **rbuf_j=PETSC_NULL;
2092   PetscScalar    **rbuf_a=PETSC_NULL;
2093   Mat_Redundant  *redund=PETSC_NULL;
2094   PetscContainer container;
2095 
2096   PetscFunctionBegin;
2097   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2098   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2099 
2100   if (reuse == MAT_REUSE_MATRIX) {
2101     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2102     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2103     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2104     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2105     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2106     if (container) {
2107       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2108     } else {
2109       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2110     }
2111     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2112 
2113     nsends    = redund->nsends;
2114     nrecvs    = redund->nrecvs;
2115     send_rank = redund->send_rank; recv_rank = send_rank + size;
2116     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
2117     sbuf_j    = redund->sbuf_j;
2118     sbuf_a    = redund->sbuf_a;
2119     rbuf_j    = redund->rbuf_j;
2120     rbuf_a    = redund->rbuf_a;
2121   }
2122 
2123   if (reuse == MAT_INITIAL_MATRIX){
2124     PetscMPIInt  subrank,subsize;
2125     PetscInt     nleftover,np_subcomm;
2126     /* get the destination processors' id send_rank, nsends and nrecvs */
2127     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2128     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2129     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
2130     recv_rank = send_rank + size;
2131     np_subcomm = size/nsubcomm;
2132     nleftover  = size - nsubcomm*np_subcomm;
2133     nsends = 0; nrecvs = 0;
2134     for (i=0; i<size; i++){ /* i=rank*/
2135       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2136         send_rank[nsends] = i; nsends++;
2137         recv_rank[nrecvs++] = i;
2138       }
2139     }
2140     if (rank >= size - nleftover){/* this proc is a leftover processor */
2141       i = size-nleftover-1;
2142       j = 0;
2143       while (j < nsubcomm - nleftover){
2144         send_rank[nsends++] = i;
2145         i--; j++;
2146       }
2147     }
2148 
2149     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2150       for (i=0; i<nleftover; i++){
2151         recv_rank[nrecvs++] = size-nleftover+i;
2152       }
2153     }
2154 
2155     /* allocate sbuf_j, sbuf_a */
2156     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2157     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2158     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2159   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2160 
2161   /* copy mat's local entries into the buffers */
2162   if (reuse == MAT_INITIAL_MATRIX){
2163     rownz_max = 0;
2164     rptr = sbuf_j;
2165     cols = sbuf_j + rend-rstart + 1;
2166     vals = sbuf_a;
2167     rptr[0] = 0;
2168     for (i=0; i<rend-rstart; i++){
2169       row = i + rstart;
2170       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2171       ncols  = nzA + nzB;
2172       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2173       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2174       /* load the column indices for this row into cols */
2175       lwrite = 0;
2176       for (l=0; l<nzB; l++) {
2177         if ((ctmp = bmap[cworkB[l]]) < cstart){
2178           vals[lwrite]   = aworkB[l];
2179           cols[lwrite++] = ctmp;
2180         }
2181       }
2182       for (l=0; l<nzA; l++){
2183         vals[lwrite]   = aworkA[l];
2184         cols[lwrite++] = cstart + cworkA[l];
2185       }
2186       for (l=0; l<nzB; l++) {
2187         if ((ctmp = bmap[cworkB[l]]) >= cend){
2188           vals[lwrite]   = aworkB[l];
2189           cols[lwrite++] = ctmp;
2190         }
2191       }
2192       vals += ncols;
2193       cols += ncols;
2194       rptr[i+1] = rptr[i] + ncols;
2195       if (rownz_max < ncols) rownz_max = ncols;
2196     }
2197     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2198   } else { /* only copy matrix values into sbuf_a */
2199     rptr = sbuf_j;
2200     vals = sbuf_a;
2201     rptr[0] = 0;
2202     for (i=0; i<rend-rstart; i++){
2203       row = i + rstart;
2204       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2205       ncols  = nzA + nzB;
2206       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2207       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2208       lwrite = 0;
2209       for (l=0; l<nzB; l++) {
2210         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2211       }
2212       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2213       for (l=0; l<nzB; l++) {
2214         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2215       }
2216       vals += ncols;
2217       rptr[i+1] = rptr[i] + ncols;
2218     }
2219   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2220 
2221   /* send nzlocal to others, and recv other's nzlocal */
2222   /*--------------------------------------------------*/
2223   if (reuse == MAT_INITIAL_MATRIX){
2224     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2225     s_waits2 = s_waits3 + nsends;
2226     s_waits1 = s_waits2 + nsends;
2227     r_waits1 = s_waits1 + nsends;
2228     r_waits2 = r_waits1 + nrecvs;
2229     r_waits3 = r_waits2 + nrecvs;
2230   } else {
2231     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2232     r_waits3 = s_waits3 + nsends;
2233   }
2234 
2235   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2236   if (reuse == MAT_INITIAL_MATRIX){
2237     /* get new tags to keep the communication clean */
2238     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2239     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2240     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2241     rbuf_nz = sbuf_nz + nsends;
2242 
2243     /* post receives of other's nzlocal */
2244     for (i=0; i<nrecvs; i++){
2245       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2246     }
2247     /* send nzlocal to others */
2248     for (i=0; i<nsends; i++){
2249       sbuf_nz[i] = nzlocal;
2250       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2251     }
2252     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2253     count = nrecvs;
2254     while (count) {
2255       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2256       recv_rank[imdex] = recv_status.MPI_SOURCE;
2257       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2258       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2259 
2260       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2261       rbuf_nz[imdex] += i + 2;
2262       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2263       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2264       count--;
2265     }
2266     /* wait on sends of nzlocal */
2267     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2268     /* send mat->i,j to others, and recv from other's */
2269     /*------------------------------------------------*/
2270     for (i=0; i<nsends; i++){
2271       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2272       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2273     }
2274     /* wait on receives of mat->i,j */
2275     /*------------------------------*/
2276     count = nrecvs;
2277     while (count) {
2278       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2279       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2280       count--;
2281     }
2282     /* wait on sends of mat->i,j */
2283     /*---------------------------*/
2284     if (nsends) {
2285       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2286     }
2287   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2288 
2289   /* post receives, send and receive mat->a */
2290   /*----------------------------------------*/
2291   for (imdex=0; imdex<nrecvs; imdex++) {
2292     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2293   }
2294   for (i=0; i<nsends; i++){
2295     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2296   }
2297   count = nrecvs;
2298   while (count) {
2299     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2300     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2301     count--;
2302   }
2303   if (nsends) {
2304     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2305   }
2306 
2307   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2308 
2309   /* create redundant matrix */
2310   /*-------------------------*/
2311   if (reuse == MAT_INITIAL_MATRIX){
2312     /* compute rownz_max for preallocation */
2313     for (imdex=0; imdex<nrecvs; imdex++){
2314       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2315       rptr = rbuf_j[imdex];
2316       for (i=0; i<j; i++){
2317         ncols = rptr[i+1] - rptr[i];
2318         if (rownz_max < ncols) rownz_max = ncols;
2319       }
2320     }
2321 
2322     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2323     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2324     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2325     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2326     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2327   } else {
2328     C = *matredundant;
2329   }
2330 
2331   /* insert local matrix entries */
2332   rptr = sbuf_j;
2333   cols = sbuf_j + rend-rstart + 1;
2334   vals = sbuf_a;
2335   for (i=0; i<rend-rstart; i++){
2336     row   = i + rstart;
2337     ncols = rptr[i+1] - rptr[i];
2338     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2339     vals += ncols;
2340     cols += ncols;
2341   }
2342   /* insert received matrix entries */
2343   for (imdex=0; imdex<nrecvs; imdex++){
2344     rstart = rowrange[recv_rank[imdex]];
2345     rend   = rowrange[recv_rank[imdex]+1];
2346     rptr = rbuf_j[imdex];
2347     cols = rbuf_j[imdex] + rend-rstart + 1;
2348     vals = rbuf_a[imdex];
2349     for (i=0; i<rend-rstart; i++){
2350       row   = i + rstart;
2351       ncols = rptr[i+1] - rptr[i];
2352       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2353       vals += ncols;
2354       cols += ncols;
2355     }
2356   }
2357   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2358   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2359   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2360   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2361   if (reuse == MAT_INITIAL_MATRIX){
2362     PetscContainer container;
2363     *matredundant = C;
2364     /* create a supporting struct and attach it to C for reuse */
2365     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2366     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2367     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2368     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2369     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2370 
2371     redund->nzlocal = nzlocal;
2372     redund->nsends  = nsends;
2373     redund->nrecvs  = nrecvs;
2374     redund->send_rank = send_rank;
2375     redund->sbuf_nz = sbuf_nz;
2376     redund->sbuf_j  = sbuf_j;
2377     redund->sbuf_a  = sbuf_a;
2378     redund->rbuf_j  = rbuf_j;
2379     redund->rbuf_a  = rbuf_a;
2380 
2381     redund->MatDestroy = C->ops->destroy;
2382     C->ops->destroy    = MatDestroy_MatRedundant;
2383   }
2384   PetscFunctionReturn(0);
2385 }
2386 
2387 #undef __FUNCT__
2388 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2389 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2390 {
2391   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2392   PetscErrorCode ierr;
2393   PetscInt       i,*idxb = 0;
2394   PetscScalar    *va,*vb;
2395   Vec            vtmp;
2396 
2397   PetscFunctionBegin;
2398   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2399   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2400   if (idx) {
2401     for (i=0; i<A->rmap->n; i++) {
2402       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2403     }
2404   }
2405 
2406   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2407   if (idx) {
2408     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2409   }
2410   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2411   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2412 
2413   for (i=0; i<A->rmap->n; i++){
2414     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2415       va[i] = vb[i];
2416       if (idx) idx[i] = a->garray[idxb[i]];
2417     }
2418   }
2419 
2420   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2421   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2422   if (idxb) {
2423     ierr = PetscFree(idxb);CHKERRQ(ierr);
2424   }
2425   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2426   PetscFunctionReturn(0);
2427 }
2428 
2429 #undef __FUNCT__
2430 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2431 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2432 {
2433   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2434   PetscErrorCode ierr;
2435   PetscInt       i,*idxb = 0;
2436   PetscScalar    *va,*vb;
2437   Vec            vtmp;
2438 
2439   PetscFunctionBegin;
2440   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2441   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2442   if (idx) {
2443     for (i=0; i<A->cmap->n; i++) {
2444       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2445     }
2446   }
2447 
2448   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2449   if (idx) {
2450     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2451   }
2452   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2453   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2454 
2455   for (i=0; i<A->rmap->n; i++){
2456     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2457       va[i] = vb[i];
2458       if (idx) idx[i] = a->garray[idxb[i]];
2459     }
2460   }
2461 
2462   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2463   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2464   if (idxb) {
2465     ierr = PetscFree(idxb);CHKERRQ(ierr);
2466   }
2467   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2468   PetscFunctionReturn(0);
2469 }
2470 
2471 #undef __FUNCT__
2472 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2473 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2474 {
2475   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2476   PetscInt       n      = A->rmap->n;
2477   PetscInt       cstart = A->cmap->rstart;
2478   PetscInt      *cmap   = mat->garray;
2479   PetscInt      *diagIdx, *offdiagIdx;
2480   Vec            diagV, offdiagV;
2481   PetscScalar   *a, *diagA, *offdiagA;
2482   PetscInt       r;
2483   PetscErrorCode ierr;
2484 
2485   PetscFunctionBegin;
2486   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2487   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2488   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2489   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2490   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2491   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2492   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2493   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2494   for(r = 0; r < n; ++r) {
2495     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2496       a[r]   = diagA[r];
2497       idx[r] = cstart + diagIdx[r];
2498     } else {
2499       a[r]   = offdiagA[r];
2500       idx[r] = cmap[offdiagIdx[r]];
2501     }
2502   }
2503   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2504   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2505   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2506   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2507   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2508   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2509   PetscFunctionReturn(0);
2510 }
2511 
2512 #undef __FUNCT__
2513 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2514 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2515 {
2516   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2517   PetscInt       n      = A->rmap->n;
2518   PetscInt       cstart = A->cmap->rstart;
2519   PetscInt      *cmap   = mat->garray;
2520   PetscInt      *diagIdx, *offdiagIdx;
2521   Vec            diagV, offdiagV;
2522   PetscScalar   *a, *diagA, *offdiagA;
2523   PetscInt       r;
2524   PetscErrorCode ierr;
2525 
2526   PetscFunctionBegin;
2527   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2528   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2529   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2530   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2531   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2532   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2533   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2534   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2535   for(r = 0; r < n; ++r) {
2536     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2537       a[r]   = diagA[r];
2538       idx[r] = cstart + diagIdx[r];
2539     } else {
2540       a[r]   = offdiagA[r];
2541       idx[r] = cmap[offdiagIdx[r]];
2542     }
2543   }
2544   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2545   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2546   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2547   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2548   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2549   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2550   PetscFunctionReturn(0);
2551 }
2552 
2553 #undef __FUNCT__
2554 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2555 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2556 {
2557   PetscErrorCode ierr;
2558   Mat            *dummy;
2559 
2560   PetscFunctionBegin;
2561   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2562   *newmat = *dummy;
2563   ierr = PetscFree(dummy);CHKERRQ(ierr);
2564   PetscFunctionReturn(0);
2565 }
2566 
2567 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2568 /* -------------------------------------------------------------------*/
2569 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2570        MatGetRow_MPIAIJ,
2571        MatRestoreRow_MPIAIJ,
2572        MatMult_MPIAIJ,
2573 /* 4*/ MatMultAdd_MPIAIJ,
2574        MatMultTranspose_MPIAIJ,
2575        MatMultTransposeAdd_MPIAIJ,
2576 #ifdef PETSC_HAVE_PBGL
2577        MatSolve_MPIAIJ,
2578 #else
2579        0,
2580 #endif
2581        0,
2582        0,
2583 /*10*/ 0,
2584        0,
2585        0,
2586        MatRelax_MPIAIJ,
2587        MatTranspose_MPIAIJ,
2588 /*15*/ MatGetInfo_MPIAIJ,
2589        MatEqual_MPIAIJ,
2590        MatGetDiagonal_MPIAIJ,
2591        MatDiagonalScale_MPIAIJ,
2592        MatNorm_MPIAIJ,
2593 /*20*/ MatAssemblyBegin_MPIAIJ,
2594        MatAssemblyEnd_MPIAIJ,
2595        MatSetOption_MPIAIJ,
2596        MatZeroEntries_MPIAIJ,
2597 /*24*/ MatZeroRows_MPIAIJ,
2598        0,
2599 #ifdef PETSC_HAVE_PBGL
2600        0,
2601 #else
2602        0,
2603 #endif
2604        0,
2605        0,
2606 /*29*/ MatSetUpPreallocation_MPIAIJ,
2607 #ifdef PETSC_HAVE_PBGL
2608        0,
2609 #else
2610        0,
2611 #endif
2612        0,
2613        0,
2614        0,
2615 /*34*/ MatDuplicate_MPIAIJ,
2616        0,
2617        0,
2618        0,
2619        0,
2620 /*39*/ MatAXPY_MPIAIJ,
2621        MatGetSubMatrices_MPIAIJ,
2622        MatIncreaseOverlap_MPIAIJ,
2623        MatGetValues_MPIAIJ,
2624        MatCopy_MPIAIJ,
2625 /*44*/ MatGetRowMax_MPIAIJ,
2626        MatScale_MPIAIJ,
2627        0,
2628        0,
2629        0,
2630 /*49*/ MatSetBlockSize_MPIAIJ,
2631        0,
2632        0,
2633        0,
2634        0,
2635 /*54*/ MatFDColoringCreate_MPIAIJ,
2636        0,
2637        MatSetUnfactored_MPIAIJ,
2638        MatPermute_MPIAIJ,
2639        0,
2640 /*59*/ MatGetSubMatrix_MPIAIJ,
2641        MatDestroy_MPIAIJ,
2642        MatView_MPIAIJ,
2643        0,
2644        0,
2645 /*64*/ 0,
2646        0,
2647        0,
2648        0,
2649        0,
2650 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2651        MatGetRowMinAbs_MPIAIJ,
2652        0,
2653        MatSetColoring_MPIAIJ,
2654 #if defined(PETSC_HAVE_ADIC)
2655        MatSetValuesAdic_MPIAIJ,
2656 #else
2657        0,
2658 #endif
2659        MatSetValuesAdifor_MPIAIJ,
2660 /*75*/ MatFDColoringApply_AIJ,
2661        0,
2662        0,
2663        0,
2664        0,
2665 /*80*/ 0,
2666        0,
2667        0,
2668 /*83*/ MatLoad_MPIAIJ,
2669        0,
2670        0,
2671        0,
2672        0,
2673        0,
2674 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2675        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2676        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2677        MatPtAP_Basic,
2678        MatPtAPSymbolic_MPIAIJ,
2679 /*94*/ MatPtAPNumeric_MPIAIJ,
2680        0,
2681        0,
2682        0,
2683        0,
2684 /*99*/ 0,
2685        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2686        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2687        MatConjugate_MPIAIJ,
2688        0,
2689 /*104*/MatSetValuesRow_MPIAIJ,
2690        MatRealPart_MPIAIJ,
2691        MatImaginaryPart_MPIAIJ,
2692        0,
2693        0,
2694 /*109*/0,
2695        MatGetRedundantMatrix_MPIAIJ,
2696        MatGetRowMin_MPIAIJ,
2697        0,
2698        0,
2699 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
2700        0,
2701        0,
2702        0,
2703        0,
2704        0
2705 };
2706 
2707 /* ----------------------------------------------------------------------------------------*/
2708 
2709 EXTERN_C_BEGIN
2710 #undef __FUNCT__
2711 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2712 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2713 {
2714   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2715   PetscErrorCode ierr;
2716 
2717   PetscFunctionBegin;
2718   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2719   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2720   PetscFunctionReturn(0);
2721 }
2722 EXTERN_C_END
2723 
2724 EXTERN_C_BEGIN
2725 #undef __FUNCT__
2726 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2727 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2728 {
2729   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2730   PetscErrorCode ierr;
2731 
2732   PetscFunctionBegin;
2733   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2734   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2735   PetscFunctionReturn(0);
2736 }
2737 EXTERN_C_END
2738 
2739 #include "petscpc.h"
2740 EXTERN_C_BEGIN
2741 #undef __FUNCT__
2742 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2743 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2744 {
2745   Mat_MPIAIJ     *b;
2746   PetscErrorCode ierr;
2747   PetscInt       i;
2748 
2749   PetscFunctionBegin;
2750   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2751   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2752   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2753   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2754 
2755   ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2756   ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2757   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2758   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2759   if (d_nnz) {
2760     for (i=0; i<B->rmap->n; i++) {
2761       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2762     }
2763   }
2764   if (o_nnz) {
2765     for (i=0; i<B->rmap->n; i++) {
2766       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2767     }
2768   }
2769   b = (Mat_MPIAIJ*)B->data;
2770 
2771   if (!B->preallocated) {
2772     /* Explicitly create 2 MATSEQAIJ matrices. */
2773     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2774     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2775     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2776     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2777     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2778     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2779     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2780     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2781   }
2782 
2783   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2784   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2785   B->preallocated = PETSC_TRUE;
2786   PetscFunctionReturn(0);
2787 }
2788 EXTERN_C_END
2789 
2790 #undef __FUNCT__
2791 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2792 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2793 {
2794   Mat            mat;
2795   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2796   PetscErrorCode ierr;
2797 
2798   PetscFunctionBegin;
2799   *newmat       = 0;
2800   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2801   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2802   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2803   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2804   a    = (Mat_MPIAIJ*)mat->data;
2805 
2806   mat->factor       = matin->factor;
2807   mat->rmap->bs      = matin->rmap->bs;
2808   mat->assembled    = PETSC_TRUE;
2809   mat->insertmode   = NOT_SET_VALUES;
2810   mat->preallocated = PETSC_TRUE;
2811 
2812   a->size           = oldmat->size;
2813   a->rank           = oldmat->rank;
2814   a->donotstash     = oldmat->donotstash;
2815   a->roworiented    = oldmat->roworiented;
2816   a->rowindices     = 0;
2817   a->rowvalues      = 0;
2818   a->getrowactive   = PETSC_FALSE;
2819 
2820   ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr);
2821   ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr);
2822 
2823   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2824   if (oldmat->colmap) {
2825 #if defined (PETSC_USE_CTABLE)
2826     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2827 #else
2828     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2829     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2830     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2831 #endif
2832   } else a->colmap = 0;
2833   if (oldmat->garray) {
2834     PetscInt len;
2835     len  = oldmat->B->cmap->n;
2836     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2837     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2838     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2839   } else a->garray = 0;
2840 
2841   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2842   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2843   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2844   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2845   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2846   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2847   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2848   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2849   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2850   *newmat = mat;
2851   PetscFunctionReturn(0);
2852 }
2853 
2854 #include "petscsys.h"
2855 
2856 #undef __FUNCT__
2857 #define __FUNCT__ "MatLoad_MPIAIJ"
2858 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2859 {
2860   Mat            A;
2861   PetscScalar    *vals,*svals;
2862   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2863   MPI_Status     status;
2864   PetscErrorCode ierr;
2865   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz;
2866   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2867   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2868   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2869   PetscInt       cend,cstart,n,*rowners;
2870   int            fd;
2871 
2872   PetscFunctionBegin;
2873   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2874   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2875   if (!rank) {
2876     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2877     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2878     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2879   }
2880 
2881   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2882   M = header[1]; N = header[2];
2883   /* determine ownership of all rows */
2884   m    = M/size + ((M % size) > rank);
2885   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2886   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2887 
2888   /* First process needs enough room for process with most rows */
2889   if (!rank) {
2890     mmax       = rowners[1];
2891     for (i=2; i<size; i++) {
2892       mmax = PetscMax(mmax,rowners[i]);
2893     }
2894   } else mmax = m;
2895 
2896   rowners[0] = 0;
2897   for (i=2; i<=size; i++) {
2898     rowners[i] += rowners[i-1];
2899   }
2900   rstart = rowners[rank];
2901   rend   = rowners[rank+1];
2902 
2903   /* distribute row lengths to all processors */
2904   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2905   if (!rank) {
2906     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2907     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2908     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2909     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2910     for (j=0; j<m; j++) {
2911       procsnz[0] += ourlens[j];
2912     }
2913     for (i=1; i<size; i++) {
2914       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2915       /* calculate the number of nonzeros on each processor */
2916       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2917         procsnz[i] += rowlengths[j];
2918       }
2919       mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]);
2920       ierr   = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2921     }
2922     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2923   } else {
2924     mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr);
2925     ierr   = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2926   }
2927 
2928   if (!rank) {
2929     /* determine max buffer needed and allocate it */
2930     maxnz = 0;
2931     for (i=0; i<size; i++) {
2932       maxnz = PetscMax(maxnz,procsnz[i]);
2933     }
2934     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2935 
2936     /* read in my part of the matrix column indices  */
2937     nz   = procsnz[0];
2938     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2939     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2940 
2941     /* read in every one elses and ship off */
2942     for (i=1; i<size; i++) {
2943       nz     = procsnz[i];
2944       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2945       mpicnt = PetscMPIIntCast(nz);
2946       ierr   = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2947     }
2948     ierr = PetscFree(cols);CHKERRQ(ierr);
2949   } else {
2950     /* determine buffer space needed for message */
2951     nz = 0;
2952     for (i=0; i<m; i++) {
2953       nz += ourlens[i];
2954     }
2955     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2956 
2957     /* receive message of column indices*/
2958     mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr);
2959     ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2960     ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr);
2961     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
2962     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
2963     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
2964   }
2965 
2966   /* determine column ownership if matrix is not square */
2967   if (N != M) {
2968     n      = N/size + ((N % size) > rank);
2969     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2970     cstart = cend - n;
2971   } else {
2972     cstart = rstart;
2973     cend   = rend;
2974     n      = cend - cstart;
2975   }
2976 
2977   /* loop over local rows, determining number of off diagonal entries */
2978   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2979   jj = 0;
2980   for (i=0; i<m; i++) {
2981     for (j=0; j<ourlens[i]; j++) {
2982       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2983       jj++;
2984     }
2985   }
2986 
2987   /* create our matrix */
2988   for (i=0; i<m; i++) {
2989     ourlens[i] -= offlens[i];
2990   }
2991   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2992   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2993   ierr = MatSetType(A,type);CHKERRQ(ierr);
2994   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2995 
2996   for (i=0; i<m; i++) {
2997     ourlens[i] += offlens[i];
2998   }
2999 
3000   if (!rank) {
3001     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3002 
3003     /* read in my part of the matrix numerical values  */
3004     nz   = procsnz[0];
3005     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3006 
3007     /* insert into matrix */
3008     jj      = rstart;
3009     smycols = mycols;
3010     svals   = vals;
3011     for (i=0; i<m; i++) {
3012       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3013       smycols += ourlens[i];
3014       svals   += ourlens[i];
3015       jj++;
3016     }
3017 
3018     /* read in other processors and ship out */
3019     for (i=1; i<size; i++) {
3020       nz     = procsnz[i];
3021       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3022       mpicnt = PetscMPIIntCast(nz);
3023       ierr   = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3024     }
3025     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3026   } else {
3027     /* receive numeric values */
3028     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3029 
3030     /* receive message of values*/
3031     mpicnt = PetscMPIIntCast(nz);
3032     ierr   = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
3033     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr);
3034     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
3035     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
3036     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
3037 
3038     /* insert into matrix */
3039     jj      = rstart;
3040     smycols = mycols;
3041     svals   = vals;
3042     for (i=0; i<m; i++) {
3043       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3044       smycols += ourlens[i];
3045       svals   += ourlens[i];
3046       jj++;
3047     }
3048   }
3049   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3050   ierr = PetscFree(vals);CHKERRQ(ierr);
3051   ierr = PetscFree(mycols);CHKERRQ(ierr);
3052   ierr = PetscFree(rowners);CHKERRQ(ierr);
3053 
3054   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3055   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3056   *newmat = A;
3057   PetscFunctionReturn(0);
3058 }
3059 
3060 #undef __FUNCT__
3061 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3062 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3063 {
3064   PetscErrorCode ierr;
3065   IS             iscol_local;
3066   PetscInt       csize;
3067 
3068   PetscFunctionBegin;
3069   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3070   if (call == MAT_REUSE_MATRIX) {
3071     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3072     if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3073   } else {
3074     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3075   }
3076   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3077   if (call == MAT_INITIAL_MATRIX) {
3078     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3079     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3080   }
3081   PetscFunctionReturn(0);
3082 }
3083 
3084 #undef __FUNCT__
3085 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3086 /*
3087     Not great since it makes two copies of the submatrix, first an SeqAIJ
3088   in local and then by concatenating the local matrices the end result.
3089   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3090 
3091   Note: This requires a sequential iscol with all indices.
3092 */
3093 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3094 {
3095   PetscErrorCode ierr;
3096   PetscMPIInt    rank,size;
3097   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3098   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3099   Mat            *local,M,Mreuse;
3100   MatScalar      *vwork,*aa;
3101   MPI_Comm       comm = ((PetscObject)mat)->comm;
3102   Mat_SeqAIJ     *aij;
3103 
3104 
3105   PetscFunctionBegin;
3106   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3107   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3108 
3109   if (call ==  MAT_REUSE_MATRIX) {
3110     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3111     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3112     local = &Mreuse;
3113     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3114   } else {
3115     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3116     Mreuse = *local;
3117     ierr   = PetscFree(local);CHKERRQ(ierr);
3118   }
3119 
3120   /*
3121       m - number of local rows
3122       n - number of columns (same on all processors)
3123       rstart - first row in new global matrix generated
3124   */
3125   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3126   if (call == MAT_INITIAL_MATRIX) {
3127     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3128     ii  = aij->i;
3129     jj  = aij->j;
3130 
3131     /*
3132         Determine the number of non-zeros in the diagonal and off-diagonal
3133         portions of the matrix in order to do correct preallocation
3134     */
3135 
3136     /* first get start and end of "diagonal" columns */
3137     if (csize == PETSC_DECIDE) {
3138       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3139       if (mglobal == n) { /* square matrix */
3140 	nlocal = m;
3141       } else {
3142         nlocal = n/size + ((n % size) > rank);
3143       }
3144     } else {
3145       nlocal = csize;
3146     }
3147     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3148     rstart = rend - nlocal;
3149     if (rank == size - 1 && rend != n) {
3150       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3151     }
3152 
3153     /* next, compute all the lengths */
3154     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3155     olens = dlens + m;
3156     for (i=0; i<m; i++) {
3157       jend = ii[i+1] - ii[i];
3158       olen = 0;
3159       dlen = 0;
3160       for (j=0; j<jend; j++) {
3161         if (*jj < rstart || *jj >= rend) olen++;
3162         else dlen++;
3163         jj++;
3164       }
3165       olens[i] = olen;
3166       dlens[i] = dlen;
3167     }
3168     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3169     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3170     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3171     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3172     ierr = PetscFree(dlens);CHKERRQ(ierr);
3173   } else {
3174     PetscInt ml,nl;
3175 
3176     M = *newmat;
3177     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3178     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3179     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3180     /*
3181          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3182        rather than the slower MatSetValues().
3183     */
3184     M->was_assembled = PETSC_TRUE;
3185     M->assembled     = PETSC_FALSE;
3186   }
3187   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3188   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3189   ii  = aij->i;
3190   jj  = aij->j;
3191   aa  = aij->a;
3192   for (i=0; i<m; i++) {
3193     row   = rstart + i;
3194     nz    = ii[i+1] - ii[i];
3195     cwork = jj;     jj += nz;
3196     vwork = aa;     aa += nz;
3197     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3198   }
3199 
3200   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3201   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3202   *newmat = M;
3203 
3204   /* save submatrix used in processor for next request */
3205   if (call ==  MAT_INITIAL_MATRIX) {
3206     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3207     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3208   }
3209 
3210   PetscFunctionReturn(0);
3211 }
3212 
3213 EXTERN_C_BEGIN
3214 #undef __FUNCT__
3215 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3216 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3217 {
3218   PetscInt       m,cstart, cend,j,nnz,i,d;
3219   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3220   const PetscInt *JJ;
3221   PetscScalar    *values;
3222   PetscErrorCode ierr;
3223 
3224   PetscFunctionBegin;
3225   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3226 
3227   ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3228   ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3229   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
3230   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
3231   m      = B->rmap->n;
3232   cstart = B->cmap->rstart;
3233   cend   = B->cmap->rend;
3234   rstart = B->rmap->rstart;
3235 
3236   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
3237   o_nnz = d_nnz + m;
3238 
3239 #if defined(PETSC_USE_DEBUGGING)
3240   for (i=0; i<m; i++) {
3241     nnz     = Ii[i+1]- Ii[i];
3242     JJ      = J + Ii[i];
3243     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3244     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3245     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3246     for (j=1; j<nnz; j++) {
3247       if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j);
3248     }
3249   }
3250 #endif
3251 
3252   for (i=0; i<m; i++) {
3253     nnz     = Ii[i+1]- Ii[i];
3254     JJ      = J + Ii[i];
3255     nnz_max = PetscMax(nnz_max,nnz);
3256     for (j=0; j<nnz; j++) {
3257       if (*JJ >= cstart) break;
3258       JJ++;
3259     }
3260     d = 0;
3261     for (; j<nnz; j++) {
3262       if (*JJ++ >= cend) break;
3263       d++;
3264     }
3265     d_nnz[i] = d;
3266     o_nnz[i] = nnz - d;
3267   }
3268   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3269   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
3270 
3271   if (v) values = (PetscScalar*)v;
3272   else {
3273     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3274     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3275   }
3276 
3277   for (i=0; i<m; i++) {
3278     ii   = i + rstart;
3279     nnz  = Ii[i+1]- Ii[i];
3280     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3281   }
3282   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3283   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3284 
3285   if (!v) {
3286     ierr = PetscFree(values);CHKERRQ(ierr);
3287   }
3288   PetscFunctionReturn(0);
3289 }
3290 EXTERN_C_END
3291 
3292 #undef __FUNCT__
3293 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3294 /*@
3295    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3296    (the default parallel PETSc format).
3297 
3298    Collective on MPI_Comm
3299 
3300    Input Parameters:
3301 +  B - the matrix
3302 .  i - the indices into j for the start of each local row (starts with zero)
3303 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3304 -  v - optional values in the matrix
3305 
3306    Level: developer
3307 
3308    Notes:
3309        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3310      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3311      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3312 
3313        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3314 
3315        The format which is used for the sparse matrix input, is equivalent to a
3316     row-major ordering.. i.e for the following matrix, the input data expected is
3317     as shown:
3318 
3319         1 0 0
3320         2 0 3     P0
3321        -------
3322         4 5 6     P1
3323 
3324      Process0 [P0]: rows_owned=[0,1]
3325         i =  {0,1,3}  [size = nrow+1  = 2+1]
3326         j =  {0,0,2}  [size = nz = 6]
3327         v =  {1,2,3}  [size = nz = 6]
3328 
3329      Process1 [P1]: rows_owned=[2]
3330         i =  {0,3}    [size = nrow+1  = 1+1]
3331         j =  {0,1,2}  [size = nz = 6]
3332         v =  {4,5,6}  [size = nz = 6]
3333 
3334       The column indices for each row MUST be sorted.
3335 
3336 .keywords: matrix, aij, compressed row, sparse, parallel
3337 
3338 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3339           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3340 @*/
3341 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3342 {
3343   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3344 
3345   PetscFunctionBegin;
3346   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3347   if (f) {
3348     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3349   }
3350   PetscFunctionReturn(0);
3351 }
3352 
3353 #undef __FUNCT__
3354 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3355 /*@C
3356    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3357    (the default parallel PETSc format).  For good matrix assembly performance
3358    the user should preallocate the matrix storage by setting the parameters
3359    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3360    performance can be increased by more than a factor of 50.
3361 
3362    Collective on MPI_Comm
3363 
3364    Input Parameters:
3365 +  A - the matrix
3366 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3367            (same value is used for all local rows)
3368 .  d_nnz - array containing the number of nonzeros in the various rows of the
3369            DIAGONAL portion of the local submatrix (possibly different for each row)
3370            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3371            The size of this array is equal to the number of local rows, i.e 'm'.
3372            You must leave room for the diagonal entry even if it is zero.
3373 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3374            submatrix (same value is used for all local rows).
3375 -  o_nnz - array containing the number of nonzeros in the various rows of the
3376            OFF-DIAGONAL portion of the local submatrix (possibly different for
3377            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3378            structure. The size of this array is equal to the number
3379            of local rows, i.e 'm'.
3380 
3381    If the *_nnz parameter is given then the *_nz parameter is ignored
3382 
3383    The AIJ format (also called the Yale sparse matrix format or
3384    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3385    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3386 
3387    The parallel matrix is partitioned such that the first m0 rows belong to
3388    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3389    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3390 
3391    The DIAGONAL portion of the local submatrix of a processor can be defined
3392    as the submatrix which is obtained by extraction the part corresponding
3393    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3394    first row that belongs to the processor, and r2 is the last row belonging
3395    to the this processor. This is a square mxm matrix. The remaining portion
3396    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3397 
3398    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3399 
3400    You can call MatGetInfo() to get information on how effective the preallocation was;
3401    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3402    You can also run with the option -info and look for messages with the string
3403    malloc in them to see if additional memory allocation was needed.
3404 
3405    Example usage:
3406 
3407    Consider the following 8x8 matrix with 34 non-zero values, that is
3408    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3409    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3410    as follows:
3411 
3412 .vb
3413             1  2  0  |  0  3  0  |  0  4
3414     Proc0   0  5  6  |  7  0  0  |  8  0
3415             9  0 10  | 11  0  0  | 12  0
3416     -------------------------------------
3417            13  0 14  | 15 16 17  |  0  0
3418     Proc1   0 18  0  | 19 20 21  |  0  0
3419             0  0  0  | 22 23  0  | 24  0
3420     -------------------------------------
3421     Proc2  25 26 27  |  0  0 28  | 29  0
3422            30  0  0  | 31 32 33  |  0 34
3423 .ve
3424 
3425    This can be represented as a collection of submatrices as:
3426 
3427 .vb
3428       A B C
3429       D E F
3430       G H I
3431 .ve
3432 
3433    Where the submatrices A,B,C are owned by proc0, D,E,F are
3434    owned by proc1, G,H,I are owned by proc2.
3435 
3436    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3437    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3438    The 'M','N' parameters are 8,8, and have the same values on all procs.
3439 
3440    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3441    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3442    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3443    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3444    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3445    matrix, ans [DF] as another SeqAIJ matrix.
3446 
3447    When d_nz, o_nz parameters are specified, d_nz storage elements are
3448    allocated for every row of the local diagonal submatrix, and o_nz
3449    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3450    One way to choose d_nz and o_nz is to use the max nonzerors per local
3451    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3452    In this case, the values of d_nz,o_nz are:
3453 .vb
3454      proc0 : dnz = 2, o_nz = 2
3455      proc1 : dnz = 3, o_nz = 2
3456      proc2 : dnz = 1, o_nz = 4
3457 .ve
3458    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3459    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3460    for proc3. i.e we are using 12+15+10=37 storage locations to store
3461    34 values.
3462 
3463    When d_nnz, o_nnz parameters are specified, the storage is specified
3464    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3465    In the above case the values for d_nnz,o_nnz are:
3466 .vb
3467      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3468      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3469      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3470 .ve
3471    Here the space allocated is sum of all the above values i.e 34, and
3472    hence pre-allocation is perfect.
3473 
3474    Level: intermediate
3475 
3476 .keywords: matrix, aij, compressed row, sparse, parallel
3477 
3478 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3479           MPIAIJ, MatGetInfo()
3480 @*/
3481 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3482 {
3483   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3484 
3485   PetscFunctionBegin;
3486   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3487   if (f) {
3488     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3489   }
3490   PetscFunctionReturn(0);
3491 }
3492 
3493 #undef __FUNCT__
3494 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3495 /*@
3496      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3497          CSR format the local rows.
3498 
3499    Collective on MPI_Comm
3500 
3501    Input Parameters:
3502 +  comm - MPI communicator
3503 .  m - number of local rows (Cannot be PETSC_DECIDE)
3504 .  n - This value should be the same as the local size used in creating the
3505        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3506        calculated if N is given) For square matrices n is almost always m.
3507 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3508 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3509 .   i - row indices
3510 .   j - column indices
3511 -   a - matrix values
3512 
3513    Output Parameter:
3514 .   mat - the matrix
3515 
3516    Level: intermediate
3517 
3518    Notes:
3519        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3520      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3521      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3522 
3523        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3524 
3525        The format which is used for the sparse matrix input, is equivalent to a
3526     row-major ordering.. i.e for the following matrix, the input data expected is
3527     as shown:
3528 
3529         1 0 0
3530         2 0 3     P0
3531        -------
3532         4 5 6     P1
3533 
3534      Process0 [P0]: rows_owned=[0,1]
3535         i =  {0,1,3}  [size = nrow+1  = 2+1]
3536         j =  {0,0,2}  [size = nz = 6]
3537         v =  {1,2,3}  [size = nz = 6]
3538 
3539      Process1 [P1]: rows_owned=[2]
3540         i =  {0,3}    [size = nrow+1  = 1+1]
3541         j =  {0,1,2}  [size = nz = 6]
3542         v =  {4,5,6}  [size = nz = 6]
3543 
3544 .keywords: matrix, aij, compressed row, sparse, parallel
3545 
3546 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3547           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3548 @*/
3549 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3550 {
3551   PetscErrorCode ierr;
3552 
3553  PetscFunctionBegin;
3554   if (i[0]) {
3555     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3556   }
3557   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3558   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3559   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3560   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3561   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3562   PetscFunctionReturn(0);
3563 }
3564 
3565 #undef __FUNCT__
3566 #define __FUNCT__ "MatCreateMPIAIJ"
3567 /*@C
3568    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3569    (the default parallel PETSc format).  For good matrix assembly performance
3570    the user should preallocate the matrix storage by setting the parameters
3571    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3572    performance can be increased by more than a factor of 50.
3573 
3574    Collective on MPI_Comm
3575 
3576    Input Parameters:
3577 +  comm - MPI communicator
3578 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3579            This value should be the same as the local size used in creating the
3580            y vector for the matrix-vector product y = Ax.
3581 .  n - This value should be the same as the local size used in creating the
3582        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3583        calculated if N is given) For square matrices n is almost always m.
3584 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3585 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3586 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3587            (same value is used for all local rows)
3588 .  d_nnz - array containing the number of nonzeros in the various rows of the
3589            DIAGONAL portion of the local submatrix (possibly different for each row)
3590            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3591            The size of this array is equal to the number of local rows, i.e 'm'.
3592            You must leave room for the diagonal entry even if it is zero.
3593 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3594            submatrix (same value is used for all local rows).
3595 -  o_nnz - array containing the number of nonzeros in the various rows of the
3596            OFF-DIAGONAL portion of the local submatrix (possibly different for
3597            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3598            structure. The size of this array is equal to the number
3599            of local rows, i.e 'm'.
3600 
3601    Output Parameter:
3602 .  A - the matrix
3603 
3604    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3605    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3606    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3607 
3608    Notes:
3609    If the *_nnz parameter is given then the *_nz parameter is ignored
3610 
3611    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3612    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3613    storage requirements for this matrix.
3614 
3615    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3616    processor than it must be used on all processors that share the object for
3617    that argument.
3618 
3619    The user MUST specify either the local or global matrix dimensions
3620    (possibly both).
3621 
3622    The parallel matrix is partitioned across processors such that the
3623    first m0 rows belong to process 0, the next m1 rows belong to
3624    process 1, the next m2 rows belong to process 2 etc.. where
3625    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3626    values corresponding to [m x N] submatrix.
3627 
3628    The columns are logically partitioned with the n0 columns belonging
3629    to 0th partition, the next n1 columns belonging to the next
3630    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3631 
3632    The DIAGONAL portion of the local submatrix on any given processor
3633    is the submatrix corresponding to the rows and columns m,n
3634    corresponding to the given processor. i.e diagonal matrix on
3635    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3636    etc. The remaining portion of the local submatrix [m x (N-n)]
3637    constitute the OFF-DIAGONAL portion. The example below better
3638    illustrates this concept.
3639 
3640    For a square global matrix we define each processor's diagonal portion
3641    to be its local rows and the corresponding columns (a square submatrix);
3642    each processor's off-diagonal portion encompasses the remainder of the
3643    local matrix (a rectangular submatrix).
3644 
3645    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3646 
3647    When calling this routine with a single process communicator, a matrix of
3648    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3649    type of communicator, use the construction mechanism:
3650      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3651 
3652    By default, this format uses inodes (identical nodes) when possible.
3653    We search for consecutive rows with the same nonzero structure, thereby
3654    reusing matrix information to achieve increased efficiency.
3655 
3656    Options Database Keys:
3657 +  -mat_no_inode  - Do not use inodes
3658 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3659 -  -mat_aij_oneindex - Internally use indexing starting at 1
3660         rather than 0.  Note that when calling MatSetValues(),
3661         the user still MUST index entries starting at 0!
3662 
3663 
3664    Example usage:
3665 
3666    Consider the following 8x8 matrix with 34 non-zero values, that is
3667    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3668    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3669    as follows:
3670 
3671 .vb
3672             1  2  0  |  0  3  0  |  0  4
3673     Proc0   0  5  6  |  7  0  0  |  8  0
3674             9  0 10  | 11  0  0  | 12  0
3675     -------------------------------------
3676            13  0 14  | 15 16 17  |  0  0
3677     Proc1   0 18  0  | 19 20 21  |  0  0
3678             0  0  0  | 22 23  0  | 24  0
3679     -------------------------------------
3680     Proc2  25 26 27  |  0  0 28  | 29  0
3681            30  0  0  | 31 32 33  |  0 34
3682 .ve
3683 
3684    This can be represented as a collection of submatrices as:
3685 
3686 .vb
3687       A B C
3688       D E F
3689       G H I
3690 .ve
3691 
3692    Where the submatrices A,B,C are owned by proc0, D,E,F are
3693    owned by proc1, G,H,I are owned by proc2.
3694 
3695    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3696    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3697    The 'M','N' parameters are 8,8, and have the same values on all procs.
3698 
3699    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3700    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3701    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3702    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3703    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3704    matrix, ans [DF] as another SeqAIJ matrix.
3705 
3706    When d_nz, o_nz parameters are specified, d_nz storage elements are
3707    allocated for every row of the local diagonal submatrix, and o_nz
3708    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3709    One way to choose d_nz and o_nz is to use the max nonzerors per local
3710    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3711    In this case, the values of d_nz,o_nz are:
3712 .vb
3713      proc0 : dnz = 2, o_nz = 2
3714      proc1 : dnz = 3, o_nz = 2
3715      proc2 : dnz = 1, o_nz = 4
3716 .ve
3717    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3718    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3719    for proc3. i.e we are using 12+15+10=37 storage locations to store
3720    34 values.
3721 
3722    When d_nnz, o_nnz parameters are specified, the storage is specified
3723    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3724    In the above case the values for d_nnz,o_nnz are:
3725 .vb
3726      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3727      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3728      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3729 .ve
3730    Here the space allocated is sum of all the above values i.e 34, and
3731    hence pre-allocation is perfect.
3732 
3733    Level: intermediate
3734 
3735 .keywords: matrix, aij, compressed row, sparse, parallel
3736 
3737 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3738           MPIAIJ, MatCreateMPIAIJWithArrays()
3739 @*/
3740 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3741 {
3742   PetscErrorCode ierr;
3743   PetscMPIInt    size;
3744 
3745   PetscFunctionBegin;
3746   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3747   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3748   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3749   if (size > 1) {
3750     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3751     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3752   } else {
3753     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3754     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3755   }
3756   PetscFunctionReturn(0);
3757 }
3758 
3759 #undef __FUNCT__
3760 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3761 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3762 {
3763   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3764 
3765   PetscFunctionBegin;
3766   *Ad     = a->A;
3767   *Ao     = a->B;
3768   *colmap = a->garray;
3769   PetscFunctionReturn(0);
3770 }
3771 
3772 #undef __FUNCT__
3773 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3774 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3775 {
3776   PetscErrorCode ierr;
3777   PetscInt       i;
3778   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3779 
3780   PetscFunctionBegin;
3781   if (coloring->ctype == IS_COLORING_GLOBAL) {
3782     ISColoringValue *allcolors,*colors;
3783     ISColoring      ocoloring;
3784 
3785     /* set coloring for diagonal portion */
3786     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3787 
3788     /* set coloring for off-diagonal portion */
3789     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3790     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3791     for (i=0; i<a->B->cmap->n; i++) {
3792       colors[i] = allcolors[a->garray[i]];
3793     }
3794     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3795     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3796     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3797     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3798   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3799     ISColoringValue *colors;
3800     PetscInt        *larray;
3801     ISColoring      ocoloring;
3802 
3803     /* set coloring for diagonal portion */
3804     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3805     for (i=0; i<a->A->cmap->n; i++) {
3806       larray[i] = i + A->cmap->rstart;
3807     }
3808     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3809     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3810     for (i=0; i<a->A->cmap->n; i++) {
3811       colors[i] = coloring->colors[larray[i]];
3812     }
3813     ierr = PetscFree(larray);CHKERRQ(ierr);
3814     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3815     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3816     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3817 
3818     /* set coloring for off-diagonal portion */
3819     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3820     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3821     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3822     for (i=0; i<a->B->cmap->n; i++) {
3823       colors[i] = coloring->colors[larray[i]];
3824     }
3825     ierr = PetscFree(larray);CHKERRQ(ierr);
3826     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3827     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3828     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3829   } else {
3830     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3831   }
3832 
3833   PetscFunctionReturn(0);
3834 }
3835 
3836 #if defined(PETSC_HAVE_ADIC)
3837 #undef __FUNCT__
3838 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3839 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3840 {
3841   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3842   PetscErrorCode ierr;
3843 
3844   PetscFunctionBegin;
3845   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3846   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3847   PetscFunctionReturn(0);
3848 }
3849 #endif
3850 
3851 #undef __FUNCT__
3852 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3853 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3854 {
3855   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3856   PetscErrorCode ierr;
3857 
3858   PetscFunctionBegin;
3859   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3860   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3861   PetscFunctionReturn(0);
3862 }
3863 
3864 #undef __FUNCT__
3865 #define __FUNCT__ "MatMerge"
3866 /*@
3867       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3868                  matrices from each processor
3869 
3870     Collective on MPI_Comm
3871 
3872    Input Parameters:
3873 +    comm - the communicators the parallel matrix will live on
3874 .    inmat - the input sequential matrices
3875 .    n - number of local columns (or PETSC_DECIDE)
3876 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3877 
3878    Output Parameter:
3879 .    outmat - the parallel matrix generated
3880 
3881     Level: advanced
3882 
3883    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3884 
3885 @*/
3886 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3887 {
3888   PetscErrorCode ierr;
3889   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3890   PetscInt       *indx;
3891   PetscScalar    *values;
3892 
3893   PetscFunctionBegin;
3894   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3895   if (scall == MAT_INITIAL_MATRIX){
3896     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3897     if (n == PETSC_DECIDE){
3898       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3899     }
3900     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3901     rstart -= m;
3902 
3903     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3904     for (i=0;i<m;i++) {
3905       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3906       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3907       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3908     }
3909     /* This routine will ONLY return MPIAIJ type matrix */
3910     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3911     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3912     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3913     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3914     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3915 
3916   } else if (scall == MAT_REUSE_MATRIX){
3917     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3918   } else {
3919     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3920   }
3921 
3922   for (i=0;i<m;i++) {
3923     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3924     Ii    = i + rstart;
3925     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3926     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3927   }
3928   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3929   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3930   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3931 
3932   PetscFunctionReturn(0);
3933 }
3934 
3935 #undef __FUNCT__
3936 #define __FUNCT__ "MatFileSplit"
3937 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3938 {
3939   PetscErrorCode    ierr;
3940   PetscMPIInt       rank;
3941   PetscInt          m,N,i,rstart,nnz;
3942   size_t            len;
3943   const PetscInt    *indx;
3944   PetscViewer       out;
3945   char              *name;
3946   Mat               B;
3947   const PetscScalar *values;
3948 
3949   PetscFunctionBegin;
3950   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3951   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3952   /* Should this be the type of the diagonal block of A? */
3953   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3954   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3955   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3956   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3957   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3958   for (i=0;i<m;i++) {
3959     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3960     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3961     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3962   }
3963   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3964   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3965 
3966   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3967   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3968   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3969   sprintf(name,"%s.%d",outfile,rank);
3970   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3971   ierr = PetscFree(name);
3972   ierr = MatView(B,out);CHKERRQ(ierr);
3973   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3974   ierr = MatDestroy(B);CHKERRQ(ierr);
3975   PetscFunctionReturn(0);
3976 }
3977 
3978 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3979 #undef __FUNCT__
3980 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3981 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3982 {
3983   PetscErrorCode       ierr;
3984   Mat_Merge_SeqsToMPI  *merge;
3985   PetscContainer       container;
3986 
3987   PetscFunctionBegin;
3988   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3989   if (container) {
3990     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3991     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3992     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3993     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3994     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3995     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3996     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3997     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3998     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3999     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4000     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4001     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
4002 
4003     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4004     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4005   }
4006   ierr = PetscFree(merge);CHKERRQ(ierr);
4007 
4008   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4009   PetscFunctionReturn(0);
4010 }
4011 
4012 #include "../src/mat/utils/freespace.h"
4013 #include "petscbt.h"
4014 
4015 #undef __FUNCT__
4016 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4017 /*@C
4018       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4019                  matrices from each processor
4020 
4021     Collective on MPI_Comm
4022 
4023    Input Parameters:
4024 +    comm - the communicators the parallel matrix will live on
4025 .    seqmat - the input sequential matrices
4026 .    m - number of local rows (or PETSC_DECIDE)
4027 .    n - number of local columns (or PETSC_DECIDE)
4028 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4029 
4030    Output Parameter:
4031 .    mpimat - the parallel matrix generated
4032 
4033     Level: advanced
4034 
4035    Notes:
4036      The dimensions of the sequential matrix in each processor MUST be the same.
4037      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4038      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4039 @*/
4040 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4041 {
4042   PetscErrorCode       ierr;
4043   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4044   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4045   PetscMPIInt          size,rank,taga,*len_s;
4046   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4047   PetscInt             proc,m;
4048   PetscInt             **buf_ri,**buf_rj;
4049   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4050   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4051   MPI_Request          *s_waits,*r_waits;
4052   MPI_Status           *status;
4053   MatScalar            *aa=a->a;
4054   MatScalar            **abuf_r,*ba_i;
4055   Mat_Merge_SeqsToMPI  *merge;
4056   PetscContainer       container;
4057 
4058   PetscFunctionBegin;
4059   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4060 
4061   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4062   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4063 
4064   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4065   if (container) {
4066     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4067   }
4068   bi     = merge->bi;
4069   bj     = merge->bj;
4070   buf_ri = merge->buf_ri;
4071   buf_rj = merge->buf_rj;
4072 
4073   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4074   owners = merge->rowmap.range;
4075   len_s  = merge->len_s;
4076 
4077   /* send and recv matrix values */
4078   /*-----------------------------*/
4079   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4080   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4081 
4082   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4083   for (proc=0,k=0; proc<size; proc++){
4084     if (!len_s[proc]) continue;
4085     i = owners[proc];
4086     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4087     k++;
4088   }
4089 
4090   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4091   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4092   ierr = PetscFree(status);CHKERRQ(ierr);
4093 
4094   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4095   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4096 
4097   /* insert mat values of mpimat */
4098   /*----------------------------*/
4099   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4100   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4101   nextrow = buf_ri_k + merge->nrecv;
4102   nextai  = nextrow + merge->nrecv;
4103 
4104   for (k=0; k<merge->nrecv; k++){
4105     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4106     nrows = *(buf_ri_k[k]);
4107     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4108     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4109   }
4110 
4111   /* set values of ba */
4112   m = merge->rowmap.n;
4113   for (i=0; i<m; i++) {
4114     arow = owners[rank] + i;
4115     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4116     bnzi = bi[i+1] - bi[i];
4117     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4118 
4119     /* add local non-zero vals of this proc's seqmat into ba */
4120     anzi = ai[arow+1] - ai[arow];
4121     aj   = a->j + ai[arow];
4122     aa   = a->a + ai[arow];
4123     nextaj = 0;
4124     for (j=0; nextaj<anzi; j++){
4125       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4126         ba_i[j] += aa[nextaj++];
4127       }
4128     }
4129 
4130     /* add received vals into ba */
4131     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4132       /* i-th row */
4133       if (i == *nextrow[k]) {
4134         anzi = *(nextai[k]+1) - *nextai[k];
4135         aj   = buf_rj[k] + *(nextai[k]);
4136         aa   = abuf_r[k] + *(nextai[k]);
4137         nextaj = 0;
4138         for (j=0; nextaj<anzi; j++){
4139           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4140             ba_i[j] += aa[nextaj++];
4141           }
4142         }
4143         nextrow[k]++; nextai[k]++;
4144       }
4145     }
4146     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4147   }
4148   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4149   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4150 
4151   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4152   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4153   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4154   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4155   PetscFunctionReturn(0);
4156 }
4157 
4158 #undef __FUNCT__
4159 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4160 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4161 {
4162   PetscErrorCode       ierr;
4163   Mat                  B_mpi;
4164   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4165   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4166   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4167   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4168   PetscInt             len,proc,*dnz,*onz;
4169   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4170   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4171   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4172   MPI_Status           *status;
4173   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4174   PetscBT              lnkbt;
4175   Mat_Merge_SeqsToMPI  *merge;
4176   PetscContainer       container;
4177 
4178   PetscFunctionBegin;
4179   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4180 
4181   /* make sure it is a PETSc comm */
4182   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4183   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4184   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4185 
4186   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4187   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4188 
4189   /* determine row ownership */
4190   /*---------------------------------------------------------*/
4191   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
4192   merge->rowmap.n = m;
4193   merge->rowmap.N = M;
4194   merge->rowmap.bs = 1;
4195   ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr);
4196   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4197   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4198 
4199   m      = merge->rowmap.n;
4200   M      = merge->rowmap.N;
4201   owners = merge->rowmap.range;
4202 
4203   /* determine the number of messages to send, their lengths */
4204   /*---------------------------------------------------------*/
4205   len_s  = merge->len_s;
4206 
4207   len = 0;  /* length of buf_si[] */
4208   merge->nsend = 0;
4209   for (proc=0; proc<size; proc++){
4210     len_si[proc] = 0;
4211     if (proc == rank){
4212       len_s[proc] = 0;
4213     } else {
4214       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4215       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4216     }
4217     if (len_s[proc]) {
4218       merge->nsend++;
4219       nrows = 0;
4220       for (i=owners[proc]; i<owners[proc+1]; i++){
4221         if (ai[i+1] > ai[i]) nrows++;
4222       }
4223       len_si[proc] = 2*(nrows+1);
4224       len += len_si[proc];
4225     }
4226   }
4227 
4228   /* determine the number and length of messages to receive for ij-structure */
4229   /*-------------------------------------------------------------------------*/
4230   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4231   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4232 
4233   /* post the Irecv of j-structure */
4234   /*-------------------------------*/
4235   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4236   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4237 
4238   /* post the Isend of j-structure */
4239   /*--------------------------------*/
4240   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
4241   sj_waits = si_waits + merge->nsend;
4242 
4243   for (proc=0, k=0; proc<size; proc++){
4244     if (!len_s[proc]) continue;
4245     i = owners[proc];
4246     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4247     k++;
4248   }
4249 
4250   /* receives and sends of j-structure are complete */
4251   /*------------------------------------------------*/
4252   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4253   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4254 
4255   /* send and recv i-structure */
4256   /*---------------------------*/
4257   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4258   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4259 
4260   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4261   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4262   for (proc=0,k=0; proc<size; proc++){
4263     if (!len_s[proc]) continue;
4264     /* form outgoing message for i-structure:
4265          buf_si[0]:                 nrows to be sent
4266                [1:nrows]:           row index (global)
4267                [nrows+1:2*nrows+1]: i-structure index
4268     */
4269     /*-------------------------------------------*/
4270     nrows = len_si[proc]/2 - 1;
4271     buf_si_i    = buf_si + nrows+1;
4272     buf_si[0]   = nrows;
4273     buf_si_i[0] = 0;
4274     nrows = 0;
4275     for (i=owners[proc]; i<owners[proc+1]; i++){
4276       anzi = ai[i+1] - ai[i];
4277       if (anzi) {
4278         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4279         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4280         nrows++;
4281       }
4282     }
4283     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4284     k++;
4285     buf_si += len_si[proc];
4286   }
4287 
4288   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4289   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4290 
4291   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4292   for (i=0; i<merge->nrecv; i++){
4293     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4294   }
4295 
4296   ierr = PetscFree(len_si);CHKERRQ(ierr);
4297   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4298   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4299   ierr = PetscFree(si_waits);CHKERRQ(ierr);
4300   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4301   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4302   ierr = PetscFree(status);CHKERRQ(ierr);
4303 
4304   /* compute a local seq matrix in each processor */
4305   /*----------------------------------------------*/
4306   /* allocate bi array and free space for accumulating nonzero column info */
4307   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4308   bi[0] = 0;
4309 
4310   /* create and initialize a linked list */
4311   nlnk = N+1;
4312   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4313 
4314   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4315   len = 0;
4316   len  = ai[owners[rank+1]] - ai[owners[rank]];
4317   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4318   current_space = free_space;
4319 
4320   /* determine symbolic info for each local row */
4321   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4322   nextrow = buf_ri_k + merge->nrecv;
4323   nextai  = nextrow + merge->nrecv;
4324   for (k=0; k<merge->nrecv; k++){
4325     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4326     nrows = *buf_ri_k[k];
4327     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4328     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4329   }
4330 
4331   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4332   len = 0;
4333   for (i=0;i<m;i++) {
4334     bnzi   = 0;
4335     /* add local non-zero cols of this proc's seqmat into lnk */
4336     arow   = owners[rank] + i;
4337     anzi   = ai[arow+1] - ai[arow];
4338     aj     = a->j + ai[arow];
4339     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4340     bnzi += nlnk;
4341     /* add received col data into lnk */
4342     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4343       if (i == *nextrow[k]) { /* i-th row */
4344         anzi = *(nextai[k]+1) - *nextai[k];
4345         aj   = buf_rj[k] + *nextai[k];
4346         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4347         bnzi += nlnk;
4348         nextrow[k]++; nextai[k]++;
4349       }
4350     }
4351     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4352 
4353     /* if free space is not available, make more free space */
4354     if (current_space->local_remaining<bnzi) {
4355       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4356       nspacedouble++;
4357     }
4358     /* copy data into free space, then initialize lnk */
4359     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4360     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4361 
4362     current_space->array           += bnzi;
4363     current_space->local_used      += bnzi;
4364     current_space->local_remaining -= bnzi;
4365 
4366     bi[i+1] = bi[i] + bnzi;
4367   }
4368 
4369   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4370 
4371   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4372   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4373   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4374 
4375   /* create symbolic parallel matrix B_mpi */
4376   /*---------------------------------------*/
4377   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4378   if (n==PETSC_DECIDE) {
4379     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4380   } else {
4381     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4382   }
4383   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4384   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4385   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4386 
4387   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4388   B_mpi->assembled     = PETSC_FALSE;
4389   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4390   merge->bi            = bi;
4391   merge->bj            = bj;
4392   merge->buf_ri        = buf_ri;
4393   merge->buf_rj        = buf_rj;
4394   merge->coi           = PETSC_NULL;
4395   merge->coj           = PETSC_NULL;
4396   merge->owners_co     = PETSC_NULL;
4397 
4398   /* attach the supporting struct to B_mpi for reuse */
4399   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4400   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4401   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4402   *mpimat = B_mpi;
4403 
4404   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4405   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4406   PetscFunctionReturn(0);
4407 }
4408 
4409 #undef __FUNCT__
4410 #define __FUNCT__ "MatMerge_SeqsToMPI"
4411 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4412 {
4413   PetscErrorCode   ierr;
4414 
4415   PetscFunctionBegin;
4416   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4417   if (scall == MAT_INITIAL_MATRIX){
4418     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4419   }
4420   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4421   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4422   PetscFunctionReturn(0);
4423 }
4424 
4425 #undef __FUNCT__
4426 #define __FUNCT__ "MatGetLocalMat"
4427 /*@
4428      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4429 
4430     Not Collective
4431 
4432    Input Parameters:
4433 +    A - the matrix
4434 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4435 
4436    Output Parameter:
4437 .    A_loc - the local sequential matrix generated
4438 
4439     Level: developer
4440 
4441 @*/
4442 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4443 {
4444   PetscErrorCode  ierr;
4445   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4446   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4447   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4448   MatScalar       *aa=a->a,*ba=b->a,*cam;
4449   PetscScalar     *ca;
4450   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4451   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4452 
4453   PetscFunctionBegin;
4454   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4455   if (scall == MAT_INITIAL_MATRIX){
4456     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4457     ci[0] = 0;
4458     for (i=0; i<am; i++){
4459       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4460     }
4461     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4462     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4463     k = 0;
4464     for (i=0; i<am; i++) {
4465       ncols_o = bi[i+1] - bi[i];
4466       ncols_d = ai[i+1] - ai[i];
4467       /* off-diagonal portion of A */
4468       for (jo=0; jo<ncols_o; jo++) {
4469         col = cmap[*bj];
4470         if (col >= cstart) break;
4471         cj[k]   = col; bj++;
4472         ca[k++] = *ba++;
4473       }
4474       /* diagonal portion of A */
4475       for (j=0; j<ncols_d; j++) {
4476         cj[k]   = cstart + *aj++;
4477         ca[k++] = *aa++;
4478       }
4479       /* off-diagonal portion of A */
4480       for (j=jo; j<ncols_o; j++) {
4481         cj[k]   = cmap[*bj++];
4482         ca[k++] = *ba++;
4483       }
4484     }
4485     /* put together the new matrix */
4486     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4487     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4488     /* Since these are PETSc arrays, change flags to free them as necessary. */
4489     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4490     mat->free_a  = PETSC_TRUE;
4491     mat->free_ij = PETSC_TRUE;
4492     mat->nonew   = 0;
4493   } else if (scall == MAT_REUSE_MATRIX){
4494     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4495     ci = mat->i; cj = mat->j; cam = mat->a;
4496     for (i=0; i<am; i++) {
4497       /* off-diagonal portion of A */
4498       ncols_o = bi[i+1] - bi[i];
4499       for (jo=0; jo<ncols_o; jo++) {
4500         col = cmap[*bj];
4501         if (col >= cstart) break;
4502         *cam++ = *ba++; bj++;
4503       }
4504       /* diagonal portion of A */
4505       ncols_d = ai[i+1] - ai[i];
4506       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4507       /* off-diagonal portion of A */
4508       for (j=jo; j<ncols_o; j++) {
4509         *cam++ = *ba++; bj++;
4510       }
4511     }
4512   } else {
4513     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4514   }
4515 
4516   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4517   PetscFunctionReturn(0);
4518 }
4519 
4520 #undef __FUNCT__
4521 #define __FUNCT__ "MatGetLocalMatCondensed"
4522 /*@C
4523      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4524 
4525     Not Collective
4526 
4527    Input Parameters:
4528 +    A - the matrix
4529 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4530 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4531 
4532    Output Parameter:
4533 .    A_loc - the local sequential matrix generated
4534 
4535     Level: developer
4536 
4537 @*/
4538 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4539 {
4540   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4541   PetscErrorCode    ierr;
4542   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4543   IS                isrowa,iscola;
4544   Mat               *aloc;
4545 
4546   PetscFunctionBegin;
4547   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4548   if (!row){
4549     start = A->rmap->rstart; end = A->rmap->rend;
4550     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4551   } else {
4552     isrowa = *row;
4553   }
4554   if (!col){
4555     start = A->cmap->rstart;
4556     cmap  = a->garray;
4557     nzA   = a->A->cmap->n;
4558     nzB   = a->B->cmap->n;
4559     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4560     ncols = 0;
4561     for (i=0; i<nzB; i++) {
4562       if (cmap[i] < start) idx[ncols++] = cmap[i];
4563       else break;
4564     }
4565     imark = i;
4566     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4567     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4568     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4569     ierr = PetscFree(idx);CHKERRQ(ierr);
4570   } else {
4571     iscola = *col;
4572   }
4573   if (scall != MAT_INITIAL_MATRIX){
4574     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4575     aloc[0] = *A_loc;
4576   }
4577   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4578   *A_loc = aloc[0];
4579   ierr = PetscFree(aloc);CHKERRQ(ierr);
4580   if (!row){
4581     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4582   }
4583   if (!col){
4584     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4585   }
4586   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4587   PetscFunctionReturn(0);
4588 }
4589 
4590 #undef __FUNCT__
4591 #define __FUNCT__ "MatGetBrowsOfAcols"
4592 /*@C
4593     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4594 
4595     Collective on Mat
4596 
4597    Input Parameters:
4598 +    A,B - the matrices in mpiaij format
4599 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4600 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4601 
4602    Output Parameter:
4603 +    rowb, colb - index sets of rows and columns of B to extract
4604 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4605 -    B_seq - the sequential matrix generated
4606 
4607     Level: developer
4608 
4609 @*/
4610 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4611 {
4612   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4613   PetscErrorCode    ierr;
4614   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4615   IS                isrowb,iscolb;
4616   Mat               *bseq;
4617 
4618   PetscFunctionBegin;
4619   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4620     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4621   }
4622   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4623 
4624   if (scall == MAT_INITIAL_MATRIX){
4625     start = A->cmap->rstart;
4626     cmap  = a->garray;
4627     nzA   = a->A->cmap->n;
4628     nzB   = a->B->cmap->n;
4629     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4630     ncols = 0;
4631     for (i=0; i<nzB; i++) {  /* row < local row index */
4632       if (cmap[i] < start) idx[ncols++] = cmap[i];
4633       else break;
4634     }
4635     imark = i;
4636     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4637     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4638     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4639     ierr = PetscFree(idx);CHKERRQ(ierr);
4640     *brstart = imark;
4641     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4642   } else {
4643     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4644     isrowb = *rowb; iscolb = *colb;
4645     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4646     bseq[0] = *B_seq;
4647   }
4648   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4649   *B_seq = bseq[0];
4650   ierr = PetscFree(bseq);CHKERRQ(ierr);
4651   if (!rowb){
4652     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4653   } else {
4654     *rowb = isrowb;
4655   }
4656   if (!colb){
4657     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4658   } else {
4659     *colb = iscolb;
4660   }
4661   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4662   PetscFunctionReturn(0);
4663 }
4664 
4665 #undef __FUNCT__
4666 #define __FUNCT__ "MatGetBrowsOfAoCols"
4667 /*@C
4668     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4669     of the OFF-DIAGONAL portion of local A
4670 
4671     Collective on Mat
4672 
4673    Input Parameters:
4674 +    A,B - the matrices in mpiaij format
4675 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4676 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4677 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4678 
4679    Output Parameter:
4680 +    B_oth - the sequential matrix generated
4681 
4682     Level: developer
4683 
4684 @*/
4685 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth)
4686 {
4687   VecScatter_MPI_General *gen_to,*gen_from;
4688   PetscErrorCode         ierr;
4689   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4690   Mat_SeqAIJ             *b_oth;
4691   VecScatter             ctx=a->Mvctx;
4692   MPI_Comm               comm=((PetscObject)ctx)->comm;
4693   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4694   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4695   PetscScalar            *rvalues,*svalues;
4696   MatScalar              *b_otha,*bufa,*bufA;
4697   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4698   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4699   MPI_Status             *sstatus,rstatus;
4700   PetscMPIInt            jj;
4701   PetscInt               *cols,sbs,rbs;
4702   PetscScalar            *vals;
4703 
4704   PetscFunctionBegin;
4705   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4706     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4707   }
4708   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4709   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4710 
4711   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4712   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4713   rvalues  = gen_from->values; /* holds the length of receiving row */
4714   svalues  = gen_to->values;   /* holds the length of sending row */
4715   nrecvs   = gen_from->n;
4716   nsends   = gen_to->n;
4717 
4718   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4719   srow     = gen_to->indices;   /* local row index to be sent */
4720   sstarts  = gen_to->starts;
4721   sprocs   = gen_to->procs;
4722   sstatus  = gen_to->sstatus;
4723   sbs      = gen_to->bs;
4724   rstarts  = gen_from->starts;
4725   rprocs   = gen_from->procs;
4726   rbs      = gen_from->bs;
4727 
4728   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4729   if (scall == MAT_INITIAL_MATRIX){
4730     /* i-array */
4731     /*---------*/
4732     /*  post receives */
4733     for (i=0; i<nrecvs; i++){
4734       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4735       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4736       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4737     }
4738 
4739     /* pack the outgoing message */
4740     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4741     rstartsj = sstartsj + nsends +1;
4742     sstartsj[0] = 0;  rstartsj[0] = 0;
4743     len = 0; /* total length of j or a array to be sent */
4744     k = 0;
4745     for (i=0; i<nsends; i++){
4746       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4747       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4748       for (j=0; j<nrows; j++) {
4749         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4750         for (l=0; l<sbs; l++){
4751           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4752           rowlen[j*sbs+l] = ncols;
4753           len += ncols;
4754           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4755         }
4756         k++;
4757       }
4758       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4759       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4760     }
4761     /* recvs and sends of i-array are completed */
4762     i = nrecvs;
4763     while (i--) {
4764       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4765     }
4766     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4767 
4768     /* allocate buffers for sending j and a arrays */
4769     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4770     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4771 
4772     /* create i-array of B_oth */
4773     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4774     b_othi[0] = 0;
4775     len = 0; /* total length of j or a array to be received */
4776     k = 0;
4777     for (i=0; i<nrecvs; i++){
4778       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4779       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4780       for (j=0; j<nrows; j++) {
4781         b_othi[k+1] = b_othi[k] + rowlen[j];
4782         len += rowlen[j]; k++;
4783       }
4784       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4785     }
4786 
4787     /* allocate space for j and a arrrays of B_oth */
4788     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4789     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4790 
4791     /* j-array */
4792     /*---------*/
4793     /*  post receives of j-array */
4794     for (i=0; i<nrecvs; i++){
4795       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4796       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4797     }
4798 
4799     /* pack the outgoing message j-array */
4800     k = 0;
4801     for (i=0; i<nsends; i++){
4802       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4803       bufJ = bufj+sstartsj[i];
4804       for (j=0; j<nrows; j++) {
4805         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4806         for (ll=0; ll<sbs; ll++){
4807           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4808           for (l=0; l<ncols; l++){
4809             *bufJ++ = cols[l];
4810           }
4811           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4812         }
4813       }
4814       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4815     }
4816 
4817     /* recvs and sends of j-array are completed */
4818     i = nrecvs;
4819     while (i--) {
4820       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4821     }
4822     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4823   } else if (scall == MAT_REUSE_MATRIX){
4824     sstartsj = *startsj;
4825     rstartsj = sstartsj + nsends +1;
4826     bufa     = *bufa_ptr;
4827     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4828     b_otha   = b_oth->a;
4829   } else {
4830     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4831   }
4832 
4833   /* a-array */
4834   /*---------*/
4835   /*  post receives of a-array */
4836   for (i=0; i<nrecvs; i++){
4837     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4838     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4839   }
4840 
4841   /* pack the outgoing message a-array */
4842   k = 0;
4843   for (i=0; i<nsends; i++){
4844     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4845     bufA = bufa+sstartsj[i];
4846     for (j=0; j<nrows; j++) {
4847       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4848       for (ll=0; ll<sbs; ll++){
4849         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4850         for (l=0; l<ncols; l++){
4851           *bufA++ = vals[l];
4852         }
4853         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4854       }
4855     }
4856     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4857   }
4858   /* recvs and sends of a-array are completed */
4859   i = nrecvs;
4860   while (i--) {
4861     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4862   }
4863   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4864   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4865 
4866   if (scall == MAT_INITIAL_MATRIX){
4867     /* put together the new matrix */
4868     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4869 
4870     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4871     /* Since these are PETSc arrays, change flags to free them as necessary. */
4872     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4873     b_oth->free_a  = PETSC_TRUE;
4874     b_oth->free_ij = PETSC_TRUE;
4875     b_oth->nonew   = 0;
4876 
4877     ierr = PetscFree(bufj);CHKERRQ(ierr);
4878     if (!startsj || !bufa_ptr){
4879       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4880       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4881     } else {
4882       *startsj  = sstartsj;
4883       *bufa_ptr = bufa;
4884     }
4885   }
4886   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4887   PetscFunctionReturn(0);
4888 }
4889 
4890 #undef __FUNCT__
4891 #define __FUNCT__ "MatGetCommunicationStructs"
4892 /*@C
4893   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4894 
4895   Not Collective
4896 
4897   Input Parameters:
4898 . A - The matrix in mpiaij format
4899 
4900   Output Parameter:
4901 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4902 . colmap - A map from global column index to local index into lvec
4903 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4904 
4905   Level: developer
4906 
4907 @*/
4908 #if defined (PETSC_USE_CTABLE)
4909 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4910 #else
4911 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4912 #endif
4913 {
4914   Mat_MPIAIJ *a;
4915 
4916   PetscFunctionBegin;
4917   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4918   PetscValidPointer(lvec, 2)
4919   PetscValidPointer(colmap, 3)
4920   PetscValidPointer(multScatter, 4)
4921   a = (Mat_MPIAIJ *) A->data;
4922   if (lvec) *lvec = a->lvec;
4923   if (colmap) *colmap = a->colmap;
4924   if (multScatter) *multScatter = a->Mvctx;
4925   PetscFunctionReturn(0);
4926 }
4927 
4928 EXTERN_C_BEGIN
4929 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*);
4930 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*);
4931 EXTERN_C_END
4932 
4933 #include "../src/mat/impls/dense/mpi/mpidense.h"
4934 
4935 #undef __FUNCT__
4936 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4937 /*
4938     Computes (B'*A')' since computing B*A directly is untenable
4939 
4940                n                       p                          p
4941         (              )       (              )         (                  )
4942       m (      A       )  *  n (       B      )   =   m (         C        )
4943         (              )       (              )         (                  )
4944 
4945 */
4946 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4947 {
4948   PetscErrorCode     ierr;
4949   Mat                At,Bt,Ct;
4950 
4951   PetscFunctionBegin;
4952   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4953   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4954   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4955   ierr = MatDestroy(At);CHKERRQ(ierr);
4956   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4957   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4958   ierr = MatDestroy(Ct);CHKERRQ(ierr);
4959   PetscFunctionReturn(0);
4960 }
4961 
4962 #undef __FUNCT__
4963 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4964 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4965 {
4966   PetscErrorCode ierr;
4967   PetscInt       m=A->rmap->n,n=B->cmap->n;
4968   Mat            Cmat;
4969 
4970   PetscFunctionBegin;
4971   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4972   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
4973   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4974   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4975   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
4976   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4977   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4978   *C   = Cmat;
4979   PetscFunctionReturn(0);
4980 }
4981 
4982 /* ----------------------------------------------------------------*/
4983 #undef __FUNCT__
4984 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4985 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4986 {
4987   PetscErrorCode ierr;
4988 
4989   PetscFunctionBegin;
4990   if (scall == MAT_INITIAL_MATRIX){
4991     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4992   }
4993   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4994   PetscFunctionReturn(0);
4995 }
4996 
4997 EXTERN_C_BEGIN
4998 #if defined(PETSC_HAVE_MUMPS)
4999 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*);
5000 #endif
5001 #if defined(PETSC_HAVE_PASTIX)
5002 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5003 #endif
5004 #if defined(PETSC_HAVE_SUPERLU_DIST)
5005 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5006 #endif
5007 #if defined(PETSC_HAVE_SPOOLES)
5008 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5009 #endif
5010 EXTERN_C_END
5011 
5012 /*MC
5013    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5014 
5015    Options Database Keys:
5016 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5017 
5018   Level: beginner
5019 
5020 .seealso: MatCreateMPIAIJ()
5021 M*/
5022 
5023 EXTERN_C_BEGIN
5024 #undef __FUNCT__
5025 #define __FUNCT__ "MatCreate_MPIAIJ"
5026 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
5027 {
5028   Mat_MPIAIJ     *b;
5029   PetscErrorCode ierr;
5030   PetscMPIInt    size;
5031 
5032   PetscFunctionBegin;
5033   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5034 
5035   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5036   B->data         = (void*)b;
5037   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5038   B->rmap->bs      = 1;
5039   B->assembled    = PETSC_FALSE;
5040   B->mapping      = 0;
5041 
5042   B->insertmode      = NOT_SET_VALUES;
5043   b->size            = size;
5044   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5045 
5046   /* build cache for off array entries formed */
5047   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5048   b->donotstash  = PETSC_FALSE;
5049   b->colmap      = 0;
5050   b->garray      = 0;
5051   b->roworiented = PETSC_TRUE;
5052 
5053   /* stuff used for matrix vector multiply */
5054   b->lvec      = PETSC_NULL;
5055   b->Mvctx     = PETSC_NULL;
5056 
5057   /* stuff for MatGetRow() */
5058   b->rowindices   = 0;
5059   b->rowvalues    = 0;
5060   b->getrowactive = PETSC_FALSE;
5061 
5062 #if defined(PETSC_HAVE_SPOOLES)
5063   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_spooles_C",
5064                                      "MatGetFactor_mpiaij_spooles",
5065                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5066 #endif
5067 #if defined(PETSC_HAVE_MUMPS)
5068   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_mumps_C",
5069                                      "MatGetFactor_mpiaij_mumps",
5070                                      MatGetFactor_mpiaij_mumps);CHKERRQ(ierr);
5071 #endif
5072 #if defined(PETSC_HAVE_PASTIX)
5073   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_pastix_C",
5074 					   "MatGetFactor_mpiaij_pastix",
5075 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5076 #endif
5077 #if defined(PETSC_HAVE_SUPERLU_DIST)
5078   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_superlu_dist_C",
5079                                      "MatGetFactor_mpiaij_superlu_dist",
5080                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5081 #endif
5082   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5083                                      "MatStoreValues_MPIAIJ",
5084                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5085   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5086                                      "MatRetrieveValues_MPIAIJ",
5087                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5088   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5089 				     "MatGetDiagonalBlock_MPIAIJ",
5090                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5091   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5092 				     "MatIsTranspose_MPIAIJ",
5093 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5094   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5095 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5096 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5097   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5098 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5099 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5100   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5101 				     "MatDiagonalScaleLocal_MPIAIJ",
5102 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5103   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
5104                                      "MatConvert_MPIAIJ_MPICSRPERM",
5105                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
5106   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
5107                                      "MatConvert_MPIAIJ_MPICRL",
5108                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
5109   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5110                                      "MatMatMult_MPIDense_MPIAIJ",
5111                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5112   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5113                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5114                                       MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5115   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5116                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5117                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5118   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5119   PetscFunctionReturn(0);
5120 }
5121 EXTERN_C_END
5122 
5123 #undef __FUNCT__
5124 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5125 /*@
5126      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5127          and "off-diagonal" part of the matrix in CSR format.
5128 
5129    Collective on MPI_Comm
5130 
5131    Input Parameters:
5132 +  comm - MPI communicator
5133 .  m - number of local rows (Cannot be PETSC_DECIDE)
5134 .  n - This value should be the same as the local size used in creating the
5135        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5136        calculated if N is given) For square matrices n is almost always m.
5137 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5138 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5139 .   i - row indices for "diagonal" portion of matrix
5140 .   j - column indices
5141 .   a - matrix values
5142 .   oi - row indices for "off-diagonal" portion of matrix
5143 .   oj - column indices
5144 -   oa - matrix values
5145 
5146    Output Parameter:
5147 .   mat - the matrix
5148 
5149    Level: advanced
5150 
5151    Notes:
5152        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5153 
5154        The i and j indices are 0 based
5155 
5156        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5157 
5158        This sets local rows and cannot be used to set off-processor values.
5159 
5160        You cannot later use MatSetValues() to change values in this matrix.
5161 
5162 .keywords: matrix, aij, compressed row, sparse, parallel
5163 
5164 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5165           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5166 @*/
5167 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5168 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5169 {
5170   PetscErrorCode ierr;
5171   Mat_MPIAIJ     *maij;
5172 
5173  PetscFunctionBegin;
5174   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5175   if (i[0]) {
5176     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5177   }
5178   if (oi[0]) {
5179     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5180   }
5181   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5182   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5183   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5184   maij = (Mat_MPIAIJ*) (*mat)->data;
5185   maij->donotstash     = PETSC_TRUE;
5186   (*mat)->preallocated = PETSC_TRUE;
5187 
5188   ierr = PetscMapSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5189   ierr = PetscMapSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5190   ierr = PetscMapSetUp((*mat)->rmap);CHKERRQ(ierr);
5191   ierr = PetscMapSetUp((*mat)->cmap);CHKERRQ(ierr);
5192 
5193   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5194   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5195 
5196   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5197   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5198   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5199   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5200 
5201   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5202   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5203   PetscFunctionReturn(0);
5204 }
5205 
5206 /*
5207     Special version for direct calls from Fortran
5208 */
5209 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5210 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5211 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5212 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5213 #endif
5214 
5215 /* Change these macros so can be used in void function */
5216 #undef CHKERRQ
5217 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5218 #undef SETERRQ2
5219 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5220 #undef SETERRQ
5221 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5222 
5223 EXTERN_C_BEGIN
5224 #undef __FUNCT__
5225 #define __FUNCT__ "matsetvaluesmpiaij_"
5226 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5227 {
5228   Mat             mat = *mmat;
5229   PetscInt        m = *mm, n = *mn;
5230   InsertMode      addv = *maddv;
5231   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5232   PetscScalar     value;
5233   PetscErrorCode  ierr;
5234 
5235   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5236   if (mat->insertmode == NOT_SET_VALUES) {
5237     mat->insertmode = addv;
5238   }
5239 #if defined(PETSC_USE_DEBUG)
5240   else if (mat->insertmode != addv) {
5241     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5242   }
5243 #endif
5244   {
5245   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5246   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5247   PetscTruth      roworiented = aij->roworiented;
5248 
5249   /* Some Variables required in the macro */
5250   Mat             A = aij->A;
5251   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5252   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5253   MatScalar       *aa = a->a;
5254   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5255   Mat             B = aij->B;
5256   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5257   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5258   MatScalar       *ba = b->a;
5259 
5260   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5261   PetscInt        nonew = a->nonew;
5262   MatScalar       *ap1,*ap2;
5263 
5264   PetscFunctionBegin;
5265   for (i=0; i<m; i++) {
5266     if (im[i] < 0) continue;
5267 #if defined(PETSC_USE_DEBUG)
5268     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5269 #endif
5270     if (im[i] >= rstart && im[i] < rend) {
5271       row      = im[i] - rstart;
5272       lastcol1 = -1;
5273       rp1      = aj + ai[row];
5274       ap1      = aa + ai[row];
5275       rmax1    = aimax[row];
5276       nrow1    = ailen[row];
5277       low1     = 0;
5278       high1    = nrow1;
5279       lastcol2 = -1;
5280       rp2      = bj + bi[row];
5281       ap2      = ba + bi[row];
5282       rmax2    = bimax[row];
5283       nrow2    = bilen[row];
5284       low2     = 0;
5285       high2    = nrow2;
5286 
5287       for (j=0; j<n; j++) {
5288         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5289         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5290         if (in[j] >= cstart && in[j] < cend){
5291           col = in[j] - cstart;
5292           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5293         } else if (in[j] < 0) continue;
5294 #if defined(PETSC_USE_DEBUG)
5295         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5296 #endif
5297         else {
5298           if (mat->was_assembled) {
5299             if (!aij->colmap) {
5300               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5301             }
5302 #if defined (PETSC_USE_CTABLE)
5303             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5304 	    col--;
5305 #else
5306             col = aij->colmap[in[j]] - 1;
5307 #endif
5308             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5309               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5310               col =  in[j];
5311               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5312               B = aij->B;
5313               b = (Mat_SeqAIJ*)B->data;
5314               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5315               rp2      = bj + bi[row];
5316               ap2      = ba + bi[row];
5317               rmax2    = bimax[row];
5318               nrow2    = bilen[row];
5319               low2     = 0;
5320               high2    = nrow2;
5321               bm       = aij->B->rmap->n;
5322               ba = b->a;
5323             }
5324           } else col = in[j];
5325           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5326         }
5327       }
5328     } else {
5329       if (!aij->donotstash) {
5330         if (roworiented) {
5331           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5332         } else {
5333           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5334         }
5335       }
5336     }
5337   }}
5338   PetscFunctionReturnVoid();
5339 }
5340 EXTERN_C_END
5341 
5342