xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision c78e9cdceff604ba5b6245eccf21eb181726cb11)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscTruth     aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap.n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap.n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap.n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap.n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 
207 #define CHUNKSIZE   15
208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
209 { \
210     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
211     lastcol1 = col;\
212     while (high1-low1 > 5) { \
213       t = (low1+high1)/2; \
214       if (rp1[t] > col) high1 = t; \
215       else             low1  = t; \
216     } \
217       for (_i=low1; _i<high1; _i++) { \
218         if (rp1[_i] > col) break; \
219         if (rp1[_i] == col) { \
220           if (addv == ADD_VALUES) ap1[_i] += value;   \
221           else                    ap1[_i] = value; \
222           goto a_noinsert; \
223         } \
224       }  \
225       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
226       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
227       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
228       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
229       N = nrow1++ - 1; a->nz++; high1++; \
230       /* shift up all the later entries in this row */ \
231       for (ii=N; ii>=_i; ii--) { \
232         rp1[ii+1] = rp1[ii]; \
233         ap1[ii+1] = ap1[ii]; \
234       } \
235       rp1[_i] = col;  \
236       ap1[_i] = value;  \
237       a_noinsert: ; \
238       ailen[row] = nrow1; \
239 }
240 
241 
242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
243 { \
244     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
245     lastcol2 = col;\
246     while (high2-low2 > 5) { \
247       t = (low2+high2)/2; \
248       if (rp2[t] > col) high2 = t; \
249       else             low2  = t; \
250     } \
251     for (_i=low2; _i<high2; _i++) {		\
252       if (rp2[_i] > col) break;			\
253       if (rp2[_i] == col) {			      \
254 	if (addv == ADD_VALUES) ap2[_i] += value;     \
255 	else                    ap2[_i] = value;      \
256 	goto b_noinsert;			      \
257       }						      \
258     }							      \
259     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
260     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
261     if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
262     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
263     N = nrow2++ - 1; b->nz++; high2++;					\
264     /* shift up all the later entries in this row */			\
265     for (ii=N; ii>=_i; ii--) {						\
266       rp2[ii+1] = rp2[ii];						\
267       ap2[ii+1] = ap2[ii];						\
268     }									\
269     rp2[_i] = col;							\
270     ap2[_i] = value;							\
271     b_noinsert: ;								\
272     bilen[row] = nrow2;							\
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
278 {
279   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
280   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
281   PetscErrorCode ierr;
282   PetscInt       l,*garray = mat->garray,diag;
283 
284   PetscFunctionBegin;
285   /* code only works for square matrices A */
286 
287   /* find size of row to the left of the diagonal part */
288   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
289   row  = row - diag;
290   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
291     if (garray[b->j[b->i[row]+l]] > diag) break;
292   }
293   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
294 
295   /* diagonal part */
296   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
297 
298   /* right of diagonal part */
299   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
300   PetscFunctionReturn(0);
301 }
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "MatSetValues_MPIAIJ"
305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
306 {
307   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
308   PetscScalar    value;
309   PetscErrorCode ierr;
310   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
311   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
312   PetscTruth     roworiented = aij->roworiented;
313 
314   /* Some Variables required in the macro */
315   Mat            A = aij->A;
316   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
317   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
318   MatScalar      *aa = a->a;
319   PetscTruth     ignorezeroentries = a->ignorezeroentries;
320   Mat            B = aij->B;
321   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
322   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
323   MatScalar      *ba = b->a;
324 
325   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
326   PetscInt       nonew = a->nonew;
327   MatScalar      *ap1,*ap2;
328 
329   PetscFunctionBegin;
330   for (i=0; i<m; i++) {
331     if (im[i] < 0) continue;
332 #if defined(PETSC_USE_DEBUG)
333     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
334 #endif
335     if (im[i] >= rstart && im[i] < rend) {
336       row      = im[i] - rstart;
337       lastcol1 = -1;
338       rp1      = aj + ai[row];
339       ap1      = aa + ai[row];
340       rmax1    = aimax[row];
341       nrow1    = ailen[row];
342       low1     = 0;
343       high1    = nrow1;
344       lastcol2 = -1;
345       rp2      = bj + bi[row];
346       ap2      = ba + bi[row];
347       rmax2    = bimax[row];
348       nrow2    = bilen[row];
349       low2     = 0;
350       high2    = nrow2;
351 
352       for (j=0; j<n; j++) {
353         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
354         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
355         if (in[j] >= cstart && in[j] < cend){
356           col = in[j] - cstart;
357           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
358         } else if (in[j] < 0) continue;
359 #if defined(PETSC_USE_DEBUG)
360         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
361 #endif
362         else {
363           if (mat->was_assembled) {
364             if (!aij->colmap) {
365               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
366             }
367 #if defined (PETSC_USE_CTABLE)
368             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
369 	    col--;
370 #else
371             col = aij->colmap[in[j]] - 1;
372 #endif
373             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
374               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
375               col =  in[j];
376               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
377               B = aij->B;
378               b = (Mat_SeqAIJ*)B->data;
379               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
380               rp2      = bj + bi[row];
381               ap2      = ba + bi[row];
382               rmax2    = bimax[row];
383               nrow2    = bilen[row];
384               low2     = 0;
385               high2    = nrow2;
386               bm       = aij->B->rmap.n;
387               ba = b->a;
388             }
389           } else col = in[j];
390           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
391         }
392       }
393     } else {
394       if (!aij->donotstash) {
395         if (roworiented) {
396           if (ignorezeroentries && v[i*n] == 0.0) continue;
397           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
398         } else {
399           if (ignorezeroentries && v[i] == 0.0) continue;
400           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
401         }
402       }
403     }
404   }
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNCT__
409 #define __FUNCT__ "MatGetValues_MPIAIJ"
410 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
411 {
412   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
413   PetscErrorCode ierr;
414   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
415   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
416 
417   PetscFunctionBegin;
418   for (i=0; i<m; i++) {
419     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
420     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
421     if (idxm[i] >= rstart && idxm[i] < rend) {
422       row = idxm[i] - rstart;
423       for (j=0; j<n; j++) {
424         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
425         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
426         if (idxn[j] >= cstart && idxn[j] < cend){
427           col = idxn[j] - cstart;
428           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
429         } else {
430           if (!aij->colmap) {
431             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
432           }
433 #if defined (PETSC_USE_CTABLE)
434           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
435           col --;
436 #else
437           col = aij->colmap[idxn[j]] - 1;
438 #endif
439           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
440           else {
441             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
442           }
443         }
444       }
445     } else {
446       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
447     }
448   }
449   PetscFunctionReturn(0);
450 }
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscErrorCode ierr;
458   PetscInt       nstash,reallocs;
459   InsertMode     addv;
460 
461   PetscFunctionBegin;
462   if (aij->donotstash) {
463     PetscFunctionReturn(0);
464   }
465 
466   /* make sure all processors are either in INSERTMODE or ADDMODE */
467   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
468   if (addv == (ADD_VALUES|INSERT_VALUES)) {
469     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
470   }
471   mat->insertmode = addv; /* in case this processor had no cache */
472 
473   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr);
474   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
475   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
482 {
483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
484   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
485   PetscErrorCode ierr;
486   PetscMPIInt    n;
487   PetscInt       i,j,rstart,ncols,flg;
488   PetscInt       *row,*col;
489   PetscTruth     other_disassembled;
490   PetscScalar    *val;
491   InsertMode     addv = mat->insertmode;
492 
493   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
494   PetscFunctionBegin;
495   if (!aij->donotstash) {
496     while (1) {
497       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
498       if (!flg) break;
499 
500       for (i=0; i<n;) {
501         /* Now identify the consecutive vals belonging to the same row */
502         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
503         if (j < n) ncols = j-i;
504         else       ncols = n-i;
505         /* Now assemble all these values with a single function call */
506         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
507         i = j;
508       }
509     }
510     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
511   }
512   a->compressedrow.use     = PETSC_FALSE;
513   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
514   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
515 
516   /* determine if any processor has disassembled, if so we must
517      also disassemble ourselfs, in order that we may reassemble. */
518   /*
519      if nonzero structure of submatrix B cannot change then we know that
520      no processor disassembled thus we can skip this stuff
521   */
522   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
523     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
524     if (mat->was_assembled && !other_disassembled) {
525       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
526     }
527   }
528   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
529     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
530   }
531   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
532   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
533   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
534   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
535 
536   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
537   aij->rowvalues = 0;
538 
539   /* used by MatAXPY() */
540   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
541   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
542 
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
549 {
550   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
551   PetscErrorCode ierr;
552 
553   PetscFunctionBegin;
554   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
555   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNCT__
560 #define __FUNCT__ "MatZeroRows_MPIAIJ"
561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
562 {
563   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
564   PetscErrorCode ierr;
565   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
566   PetscInt       i,*owners = A->rmap.range;
567   PetscInt       *nprocs,j,idx,nsends,row;
568   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
569   PetscInt       *rvalues,count,base,slen,*source;
570   PetscInt       *lens,*lrows,*values,rstart=A->rmap.rstart;
571   MPI_Comm       comm = ((PetscObject)A)->comm;
572   MPI_Request    *send_waits,*recv_waits;
573   MPI_Status     recv_status,*send_status;
574 #if defined(PETSC_DEBUG)
575   PetscTruth     found = PETSC_FALSE;
576 #endif
577 
578   PetscFunctionBegin;
579   /*  first count number of contributors to each processor */
580   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
581   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
582   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
583   j = 0;
584   for (i=0; i<N; i++) {
585     if (lastidx > (idx = rows[i])) j = 0;
586     lastidx = idx;
587     for (; j<size; j++) {
588       if (idx >= owners[j] && idx < owners[j+1]) {
589         nprocs[2*j]++;
590         nprocs[2*j+1] = 1;
591         owner[i] = j;
592 #if defined(PETSC_DEBUG)
593         found = PETSC_TRUE;
594 #endif
595         break;
596       }
597     }
598 #if defined(PETSC_DEBUG)
599     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
600     found = PETSC_FALSE;
601 #endif
602   }
603   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
604 
605   /* inform other processors of number of messages and max length*/
606   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
607 
608   /* post receives:   */
609   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
610   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
611   for (i=0; i<nrecvs; i++) {
612     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
613   }
614 
615   /* do sends:
616       1) starts[i] gives the starting index in svalues for stuff going to
617          the ith processor
618   */
619   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
620   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
621   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
622   starts[0] = 0;
623   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
624   for (i=0; i<N; i++) {
625     svalues[starts[owner[i]]++] = rows[i];
626   }
627 
628   starts[0] = 0;
629   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
630   count = 0;
631   for (i=0; i<size; i++) {
632     if (nprocs[2*i+1]) {
633       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
634     }
635   }
636   ierr = PetscFree(starts);CHKERRQ(ierr);
637 
638   base = owners[rank];
639 
640   /*  wait on receives */
641   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
642   source = lens + nrecvs;
643   count  = nrecvs; slen = 0;
644   while (count) {
645     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
646     /* unpack receives into our local space */
647     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
648     source[imdex]  = recv_status.MPI_SOURCE;
649     lens[imdex]    = n;
650     slen          += n;
651     count--;
652   }
653   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
654 
655   /* move the data into the send scatter */
656   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
657   count = 0;
658   for (i=0; i<nrecvs; i++) {
659     values = rvalues + i*nmax;
660     for (j=0; j<lens[i]; j++) {
661       lrows[count++] = values[j] - base;
662     }
663   }
664   ierr = PetscFree(rvalues);CHKERRQ(ierr);
665   ierr = PetscFree(lens);CHKERRQ(ierr);
666   ierr = PetscFree(owner);CHKERRQ(ierr);
667   ierr = PetscFree(nprocs);CHKERRQ(ierr);
668 
669   /* actually zap the local rows */
670   /*
671         Zero the required rows. If the "diagonal block" of the matrix
672      is square and the user wishes to set the diagonal we use separate
673      code so that MatSetValues() is not called for each diagonal allocating
674      new memory, thus calling lots of mallocs and slowing things down.
675 
676        Contributed by: Matthew Knepley
677   */
678   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
679   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
680   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
681     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
682   } else if (diag != 0.0) {
683     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
684     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
685       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
687     }
688     for (i = 0; i < slen; i++) {
689       row  = lrows[i] + rstart;
690       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
691     }
692     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
693     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
694   } else {
695     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
696   }
697   ierr = PetscFree(lrows);CHKERRQ(ierr);
698 
699   /* wait on sends */
700   if (nsends) {
701     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
702     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
703     ierr = PetscFree(send_status);CHKERRQ(ierr);
704   }
705   ierr = PetscFree(send_waits);CHKERRQ(ierr);
706   ierr = PetscFree(svalues);CHKERRQ(ierr);
707 
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNCT__
712 #define __FUNCT__ "MatMult_MPIAIJ"
713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
714 {
715   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
716   PetscErrorCode ierr;
717   PetscInt       nt;
718 
719   PetscFunctionBegin;
720   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
721   if (nt != A->cmap.n) {
722     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt);
723   }
724   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
725   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
726   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
727   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
728   PetscFunctionReturn(0);
729 }
730 
731 #undef __FUNCT__
732 #define __FUNCT__ "MatMultAdd_MPIAIJ"
733 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
734 {
735   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
736   PetscErrorCode ierr;
737 
738   PetscFunctionBegin;
739   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
740   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
741   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
742   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
743   PetscFunctionReturn(0);
744 }
745 
746 #undef __FUNCT__
747 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
748 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
749 {
750   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
751   PetscErrorCode ierr;
752   PetscTruth     merged;
753 
754   PetscFunctionBegin;
755   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
756   /* do nondiagonal part */
757   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
758   if (!merged) {
759     /* send it on its way */
760     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
761     /* do local part */
762     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
763     /* receive remote parts: note this assumes the values are not actually */
764     /* added in yy until the next line, */
765     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
766   } else {
767     /* do local part */
768     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
769     /* send it on its way */
770     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
771     /* values actually were received in the Begin() but we need to call this nop */
772     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
773   }
774   PetscFunctionReturn(0);
775 }
776 
777 EXTERN_C_BEGIN
778 #undef __FUNCT__
779 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
780 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
781 {
782   MPI_Comm       comm;
783   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
784   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
785   IS             Me,Notme;
786   PetscErrorCode ierr;
787   PetscInt       M,N,first,last,*notme,i;
788   PetscMPIInt    size;
789 
790   PetscFunctionBegin;
791 
792   /* Easy test: symmetric diagonal block */
793   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
794   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
795   if (!*f) PetscFunctionReturn(0);
796   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
797   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
798   if (size == 1) PetscFunctionReturn(0);
799 
800   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
801   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
802   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
803   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
804   for (i=0; i<first; i++) notme[i] = i;
805   for (i=last; i<M; i++) notme[i-last+first] = i;
806   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
807   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
808   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
809   Aoff = Aoffs[0];
810   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
811   Boff = Boffs[0];
812   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
813   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
814   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
815   ierr = ISDestroy(Me);CHKERRQ(ierr);
816   ierr = ISDestroy(Notme);CHKERRQ(ierr);
817 
818   PetscFunctionReturn(0);
819 }
820 EXTERN_C_END
821 
822 #undef __FUNCT__
823 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
824 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
825 {
826   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
827   PetscErrorCode ierr;
828 
829   PetscFunctionBegin;
830   /* do nondiagonal part */
831   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
832   /* send it on its way */
833   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
834   /* do local part */
835   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
836   /* receive remote parts */
837   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
838   PetscFunctionReturn(0);
839 }
840 
841 /*
842   This only works correctly for square matrices where the subblock A->A is the
843    diagonal block
844 */
845 #undef __FUNCT__
846 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
847 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
848 {
849   PetscErrorCode ierr;
850   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
851 
852   PetscFunctionBegin;
853   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
854   if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) {
855     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
856   }
857   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
858   PetscFunctionReturn(0);
859 }
860 
861 #undef __FUNCT__
862 #define __FUNCT__ "MatScale_MPIAIJ"
863 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
864 {
865   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
866   PetscErrorCode ierr;
867 
868   PetscFunctionBegin;
869   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
870   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
871   PetscFunctionReturn(0);
872 }
873 
874 #undef __FUNCT__
875 #define __FUNCT__ "MatDestroy_MPIAIJ"
876 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
877 {
878   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
879   PetscErrorCode ierr;
880 
881   PetscFunctionBegin;
882 #if defined(PETSC_USE_LOG)
883   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
884 #endif
885   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
886   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
887   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
888 #if defined (PETSC_USE_CTABLE)
889   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
890 #else
891   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
892 #endif
893   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
894   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
895   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
896   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
897   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
898   ierr = PetscFree(aij);CHKERRQ(ierr);
899 
900   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
901   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
902   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
903   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
904   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
905   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
906   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
907   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
908   PetscFunctionReturn(0);
909 }
910 
911 #undef __FUNCT__
912 #define __FUNCT__ "MatView_MPIAIJ_Binary"
913 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
914 {
915   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
916   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
917   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
918   PetscErrorCode    ierr;
919   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
920   int               fd;
921   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
922   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz;
923   PetscScalar       *column_values;
924 
925   PetscFunctionBegin;
926   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
927   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
928   nz   = A->nz + B->nz;
929   if (!rank) {
930     header[0] = MAT_FILE_COOKIE;
931     header[1] = mat->rmap.N;
932     header[2] = mat->cmap.N;
933     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
934     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
935     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
936     /* get largest number of rows any processor has */
937     rlen = mat->rmap.n;
938     range = mat->rmap.range;
939     for (i=1; i<size; i++) {
940       rlen = PetscMax(rlen,range[i+1] - range[i]);
941     }
942   } else {
943     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
944     rlen = mat->rmap.n;
945   }
946 
947   /* load up the local row counts */
948   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
949   for (i=0; i<mat->rmap.n; i++) {
950     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
951   }
952 
953   /* store the row lengths to the file */
954   if (!rank) {
955     MPI_Status status;
956     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
957     for (i=1; i<size; i++) {
958       rlen = range[i+1] - range[i];
959       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
960       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
961     }
962   } else {
963     ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
964   }
965   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
966 
967   /* load up the local column indices */
968   nzmax = nz; /* )th processor needs space a largest processor needs */
969   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
970   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
971   cnt  = 0;
972   for (i=0; i<mat->rmap.n; i++) {
973     for (j=B->i[i]; j<B->i[i+1]; j++) {
974       if ( (col = garray[B->j[j]]) > cstart) break;
975       column_indices[cnt++] = col;
976     }
977     for (k=A->i[i]; k<A->i[i+1]; k++) {
978       column_indices[cnt++] = A->j[k] + cstart;
979     }
980     for (; j<B->i[i+1]; j++) {
981       column_indices[cnt++] = garray[B->j[j]];
982     }
983   }
984   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
985 
986   /* store the column indices to the file */
987   if (!rank) {
988     MPI_Status status;
989     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
990     for (i=1; i<size; i++) {
991       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
992       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
993       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
994       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
995     }
996   } else {
997     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
998     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
999   }
1000   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1001 
1002   /* load up the local column values */
1003   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1004   cnt  = 0;
1005   for (i=0; i<mat->rmap.n; i++) {
1006     for (j=B->i[i]; j<B->i[i+1]; j++) {
1007       if ( garray[B->j[j]] > cstart) break;
1008       column_values[cnt++] = B->a[j];
1009     }
1010     for (k=A->i[i]; k<A->i[i+1]; k++) {
1011       column_values[cnt++] = A->a[k];
1012     }
1013     for (; j<B->i[i+1]; j++) {
1014       column_values[cnt++] = B->a[j];
1015     }
1016   }
1017   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1018 
1019   /* store the column values to the file */
1020   if (!rank) {
1021     MPI_Status status;
1022     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1023     for (i=1; i<size; i++) {
1024       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1025       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1026       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1027       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1028     }
1029   } else {
1030     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1031     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1032   }
1033   ierr = PetscFree(column_values);CHKERRQ(ierr);
1034   PetscFunctionReturn(0);
1035 }
1036 
1037 #undef __FUNCT__
1038 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1039 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1040 {
1041   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1042   PetscErrorCode    ierr;
1043   PetscMPIInt       rank = aij->rank,size = aij->size;
1044   PetscTruth        isdraw,iascii,isbinary;
1045   PetscViewer       sviewer;
1046   PetscViewerFormat format;
1047 
1048   PetscFunctionBegin;
1049   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1050   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1051   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1052   if (iascii) {
1053     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1054     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1055       MatInfo    info;
1056       PetscTruth inodes;
1057 
1058       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1059       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1060       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1061       if (!inodes) {
1062         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1063 					      rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1064       } else {
1065         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1066 		    rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1067       }
1068       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1069       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1070       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1071       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1072       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1073       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1074       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1075       PetscFunctionReturn(0);
1076     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1077       PetscInt   inodecount,inodelimit,*inodes;
1078       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1079       if (inodes) {
1080         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1081       } else {
1082         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1083       }
1084       PetscFunctionReturn(0);
1085     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1086       PetscFunctionReturn(0);
1087     }
1088   } else if (isbinary) {
1089     if (size == 1) {
1090       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1091       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1092     } else {
1093       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1094     }
1095     PetscFunctionReturn(0);
1096   } else if (isdraw) {
1097     PetscDraw  draw;
1098     PetscTruth isnull;
1099     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1100     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1101   }
1102 
1103   if (size == 1) {
1104     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1105     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1106   } else {
1107     /* assemble the entire matrix onto first processor. */
1108     Mat         A;
1109     Mat_SeqAIJ  *Aloc;
1110     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
1111     MatScalar   *a;
1112 
1113     if (mat->rmap.N > 1024) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 512 rows, use binary format instead");
1114 
1115     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1116     if (!rank) {
1117       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1118     } else {
1119       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1120     }
1121     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1122     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1123     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1124     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1125 
1126     /* copy over the A part */
1127     Aloc = (Mat_SeqAIJ*)aij->A->data;
1128     m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1129     row = mat->rmap.rstart;
1130     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;}
1131     for (i=0; i<m; i++) {
1132       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1133       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1134     }
1135     aj = Aloc->j;
1136     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;}
1137 
1138     /* copy over the B part */
1139     Aloc = (Mat_SeqAIJ*)aij->B->data;
1140     m    = aij->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1141     row  = mat->rmap.rstart;
1142     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1143     ct   = cols;
1144     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1145     for (i=0; i<m; i++) {
1146       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1147       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1148     }
1149     ierr = PetscFree(ct);CHKERRQ(ierr);
1150     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1151     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1152     /*
1153        Everyone has to call to draw the matrix since the graphics waits are
1154        synchronized across all processors that share the PetscDraw object
1155     */
1156     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1157     if (!rank) {
1158       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1159       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1160     }
1161     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1162     ierr = MatDestroy(A);CHKERRQ(ierr);
1163   }
1164   PetscFunctionReturn(0);
1165 }
1166 
1167 #undef __FUNCT__
1168 #define __FUNCT__ "MatView_MPIAIJ"
1169 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1170 {
1171   PetscErrorCode ierr;
1172   PetscTruth     iascii,isdraw,issocket,isbinary;
1173 
1174   PetscFunctionBegin;
1175   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1176   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1177   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1178   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1179   if (iascii || isdraw || isbinary || issocket) {
1180     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1181   } else {
1182     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1183   }
1184   PetscFunctionReturn(0);
1185 }
1186 
1187 #undef __FUNCT__
1188 #define __FUNCT__ "MatRelax_MPIAIJ"
1189 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1190 {
1191   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1192   PetscErrorCode ierr;
1193   Vec            bb1;
1194 
1195   PetscFunctionBegin;
1196   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1197 
1198   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1199     if (flag & SOR_ZERO_INITIAL_GUESS) {
1200       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1201       its--;
1202     }
1203 
1204     while (its--) {
1205       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1206       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1207 
1208       /* update rhs: bb1 = bb - B*x */
1209       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1210       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1211 
1212       /* local sweep */
1213       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
1214     }
1215   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1216     if (flag & SOR_ZERO_INITIAL_GUESS) {
1217       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1218       its--;
1219     }
1220     while (its--) {
1221       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1222       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1223 
1224       /* update rhs: bb1 = bb - B*x */
1225       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1226       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1227 
1228       /* local sweep */
1229       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1230     }
1231   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1232     if (flag & SOR_ZERO_INITIAL_GUESS) {
1233       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1234       its--;
1235     }
1236     while (its--) {
1237       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1238       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1239 
1240       /* update rhs: bb1 = bb - B*x */
1241       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1242       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1243 
1244       /* local sweep */
1245       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1246     }
1247   } else {
1248     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1249   }
1250 
1251   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 #undef __FUNCT__
1256 #define __FUNCT__ "MatPermute_MPIAIJ"
1257 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1258 {
1259   MPI_Comm       comm,pcomm;
1260   PetscInt       first,local_size,nrows,*rows;
1261   int            ntids;
1262   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1263   PetscErrorCode ierr;
1264 
1265   PetscFunctionBegin;
1266   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1267   /* make a collective version of 'rowp' */
1268   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1269   if (pcomm==comm) {
1270     crowp = rowp;
1271   } else {
1272     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1273     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1274     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1275     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1276   }
1277   /* collect the global row permutation and invert it */
1278   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1279   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1280   if (pcomm!=comm) {
1281     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1282   }
1283   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1284   /* get the local target indices */
1285   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1286   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1287   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1288   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1289   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1290   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1291   /* the column permutation is so much easier;
1292      make a local version of 'colp' and invert it */
1293   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1294   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1295   if (ntids==1) {
1296     lcolp = colp;
1297   } else {
1298     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1299     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1300     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1301   }
1302   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1303   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1304   if (ntids>1) {
1305     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1306     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1307   }
1308   /* now we just get the submatrix */
1309   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1310   /* clean up */
1311   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1312   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1313   PetscFunctionReturn(0);
1314 }
1315 
1316 #undef __FUNCT__
1317 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1318 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1319 {
1320   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1321   Mat            A = mat->A,B = mat->B;
1322   PetscErrorCode ierr;
1323   PetscReal      isend[5],irecv[5];
1324 
1325   PetscFunctionBegin;
1326   info->block_size     = 1.0;
1327   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1328   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1329   isend[3] = info->memory;  isend[4] = info->mallocs;
1330   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1331   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1332   isend[3] += info->memory;  isend[4] += info->mallocs;
1333   if (flag == MAT_LOCAL) {
1334     info->nz_used      = isend[0];
1335     info->nz_allocated = isend[1];
1336     info->nz_unneeded  = isend[2];
1337     info->memory       = isend[3];
1338     info->mallocs      = isend[4];
1339   } else if (flag == MAT_GLOBAL_MAX) {
1340     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1341     info->nz_used      = irecv[0];
1342     info->nz_allocated = irecv[1];
1343     info->nz_unneeded  = irecv[2];
1344     info->memory       = irecv[3];
1345     info->mallocs      = irecv[4];
1346   } else if (flag == MAT_GLOBAL_SUM) {
1347     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1348     info->nz_used      = irecv[0];
1349     info->nz_allocated = irecv[1];
1350     info->nz_unneeded  = irecv[2];
1351     info->memory       = irecv[3];
1352     info->mallocs      = irecv[4];
1353   }
1354   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1355   info->fill_ratio_needed = 0;
1356   info->factor_mallocs    = 0;
1357   info->rows_global       = (double)matin->rmap.N;
1358   info->columns_global    = (double)matin->cmap.N;
1359   info->rows_local        = (double)matin->rmap.n;
1360   info->columns_local     = (double)matin->cmap.N;
1361 
1362   PetscFunctionReturn(0);
1363 }
1364 
1365 #undef __FUNCT__
1366 #define __FUNCT__ "MatSetOption_MPIAIJ"
1367 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1368 {
1369   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1370   PetscErrorCode ierr;
1371 
1372   PetscFunctionBegin;
1373   switch (op) {
1374   case MAT_NEW_NONZERO_LOCATIONS:
1375   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1376   case MAT_KEEP_ZEROED_ROWS:
1377   case MAT_NEW_NONZERO_LOCATION_ERR:
1378   case MAT_USE_INODES:
1379   case MAT_IGNORE_ZERO_ENTRIES:
1380     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1381     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1382     break;
1383   case MAT_ROW_ORIENTED:
1384     a->roworiented = flg;
1385     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1386     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1387     break;
1388   case MAT_NEW_DIAGONALS:
1389     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1390     break;
1391   case MAT_IGNORE_OFF_PROC_ENTRIES:
1392     a->donotstash = PETSC_TRUE;
1393     break;
1394   case MAT_SYMMETRIC:
1395     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1396     break;
1397   case MAT_STRUCTURALLY_SYMMETRIC:
1398   case MAT_HERMITIAN:
1399   case MAT_SYMMETRY_ETERNAL:
1400     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1401     break;
1402   default:
1403     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1404   }
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 #undef __FUNCT__
1409 #define __FUNCT__ "MatGetRow_MPIAIJ"
1410 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1411 {
1412   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1413   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1414   PetscErrorCode ierr;
1415   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart;
1416   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend;
1417   PetscInt       *cmap,*idx_p;
1418 
1419   PetscFunctionBegin;
1420   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1421   mat->getrowactive = PETSC_TRUE;
1422 
1423   if (!mat->rowvalues && (idx || v)) {
1424     /*
1425         allocate enough space to hold information from the longest row.
1426     */
1427     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1428     PetscInt     max = 1,tmp;
1429     for (i=0; i<matin->rmap.n; i++) {
1430       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1431       if (max < tmp) { max = tmp; }
1432     }
1433     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1434     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1435   }
1436 
1437   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1438   lrow = row - rstart;
1439 
1440   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1441   if (!v)   {pvA = 0; pvB = 0;}
1442   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1443   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1444   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1445   nztot = nzA + nzB;
1446 
1447   cmap  = mat->garray;
1448   if (v  || idx) {
1449     if (nztot) {
1450       /* Sort by increasing column numbers, assuming A and B already sorted */
1451       PetscInt imark = -1;
1452       if (v) {
1453         *v = v_p = mat->rowvalues;
1454         for (i=0; i<nzB; i++) {
1455           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1456           else break;
1457         }
1458         imark = i;
1459         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1460         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1461       }
1462       if (idx) {
1463         *idx = idx_p = mat->rowindices;
1464         if (imark > -1) {
1465           for (i=0; i<imark; i++) {
1466             idx_p[i] = cmap[cworkB[i]];
1467           }
1468         } else {
1469           for (i=0; i<nzB; i++) {
1470             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1471             else break;
1472           }
1473           imark = i;
1474         }
1475         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1476         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1477       }
1478     } else {
1479       if (idx) *idx = 0;
1480       if (v)   *v   = 0;
1481     }
1482   }
1483   *nz = nztot;
1484   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1485   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1486   PetscFunctionReturn(0);
1487 }
1488 
1489 #undef __FUNCT__
1490 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1491 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1492 {
1493   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1494 
1495   PetscFunctionBegin;
1496   if (!aij->getrowactive) {
1497     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1498   }
1499   aij->getrowactive = PETSC_FALSE;
1500   PetscFunctionReturn(0);
1501 }
1502 
1503 #undef __FUNCT__
1504 #define __FUNCT__ "MatNorm_MPIAIJ"
1505 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1506 {
1507   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1508   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1509   PetscErrorCode ierr;
1510   PetscInt       i,j,cstart = mat->cmap.rstart;
1511   PetscReal      sum = 0.0;
1512   MatScalar      *v;
1513 
1514   PetscFunctionBegin;
1515   if (aij->size == 1) {
1516     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1517   } else {
1518     if (type == NORM_FROBENIUS) {
1519       v = amat->a;
1520       for (i=0; i<amat->nz; i++) {
1521 #if defined(PETSC_USE_COMPLEX)
1522         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1523 #else
1524         sum += (*v)*(*v); v++;
1525 #endif
1526       }
1527       v = bmat->a;
1528       for (i=0; i<bmat->nz; i++) {
1529 #if defined(PETSC_USE_COMPLEX)
1530         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1531 #else
1532         sum += (*v)*(*v); v++;
1533 #endif
1534       }
1535       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1536       *norm = sqrt(*norm);
1537     } else if (type == NORM_1) { /* max column norm */
1538       PetscReal *tmp,*tmp2;
1539       PetscInt  *jj,*garray = aij->garray;
1540       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1541       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1542       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
1543       *norm = 0.0;
1544       v = amat->a; jj = amat->j;
1545       for (j=0; j<amat->nz; j++) {
1546         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1547       }
1548       v = bmat->a; jj = bmat->j;
1549       for (j=0; j<bmat->nz; j++) {
1550         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1551       }
1552       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1553       for (j=0; j<mat->cmap.N; j++) {
1554         if (tmp2[j] > *norm) *norm = tmp2[j];
1555       }
1556       ierr = PetscFree(tmp);CHKERRQ(ierr);
1557       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1558     } else if (type == NORM_INFINITY) { /* max row norm */
1559       PetscReal ntemp = 0.0;
1560       for (j=0; j<aij->A->rmap.n; j++) {
1561         v = amat->a + amat->i[j];
1562         sum = 0.0;
1563         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1564           sum += PetscAbsScalar(*v); v++;
1565         }
1566         v = bmat->a + bmat->i[j];
1567         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1568           sum += PetscAbsScalar(*v); v++;
1569         }
1570         if (sum > ntemp) ntemp = sum;
1571       }
1572       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1573     } else {
1574       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1575     }
1576   }
1577   PetscFunctionReturn(0);
1578 }
1579 
1580 #undef __FUNCT__
1581 #define __FUNCT__ "MatTranspose_MPIAIJ"
1582 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1583 {
1584   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1585   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1586   PetscErrorCode ierr;
1587   PetscInt       M = A->rmap.N,N = A->cmap.N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1588   PetscInt       cstart=A->cmap.rstart,ncol;
1589   Mat            B;
1590   MatScalar      *array;
1591 
1592   PetscFunctionBegin;
1593   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1594 
1595   ma = A->rmap.n; na = A->cmap.n; mb = a->B->rmap.n;
1596   ai = Aloc->i; aj = Aloc->j;
1597   bi = Bloc->i; bj = Bloc->j;
1598   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1599     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1600     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1601     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1602     for (i=0; i<ai[ma]; i++){
1603       d_nnz[aj[i]] ++;
1604       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1605     }
1606 
1607     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1608     ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1609     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1610     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1611     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1612   } else {
1613     B = *matout;
1614   }
1615 
1616   /* copy over the A part */
1617   array = Aloc->a;
1618   row = A->rmap.rstart;
1619   for (i=0; i<ma; i++) {
1620     ncol = ai[i+1]-ai[i];
1621     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1622     row++; array += ncol; aj += ncol;
1623   }
1624   aj = Aloc->j;
1625   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1626 
1627   /* copy over the B part */
1628   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1629   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1630   array = Bloc->a;
1631   row = A->rmap.rstart;
1632   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1633   cols_tmp = cols;
1634   for (i=0; i<mb; i++) {
1635     ncol = bi[i+1]-bi[i];
1636     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1637     row++; array += ncol; cols_tmp += ncol;
1638   }
1639   ierr = PetscFree(cols);CHKERRQ(ierr);
1640 
1641   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1642   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1643   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1644     *matout = B;
1645   } else {
1646     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1647   }
1648   PetscFunctionReturn(0);
1649 }
1650 
1651 #undef __FUNCT__
1652 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1653 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1654 {
1655   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1656   Mat            a = aij->A,b = aij->B;
1657   PetscErrorCode ierr;
1658   PetscInt       s1,s2,s3;
1659 
1660   PetscFunctionBegin;
1661   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1662   if (rr) {
1663     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1664     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1665     /* Overlap communication with computation. */
1666     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1667   }
1668   if (ll) {
1669     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1670     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1671     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1672   }
1673   /* scale  the diagonal block */
1674   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1675 
1676   if (rr) {
1677     /* Do a scatter end and then right scale the off-diagonal block */
1678     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1679     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1680   }
1681 
1682   PetscFunctionReturn(0);
1683 }
1684 
1685 #undef __FUNCT__
1686 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1687 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1688 {
1689   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1690   PetscErrorCode ierr;
1691 
1692   PetscFunctionBegin;
1693   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1694   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1695   PetscFunctionReturn(0);
1696 }
1697 #undef __FUNCT__
1698 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1699 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1700 {
1701   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1702   PetscErrorCode ierr;
1703 
1704   PetscFunctionBegin;
1705   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1706   PetscFunctionReturn(0);
1707 }
1708 
1709 #undef __FUNCT__
1710 #define __FUNCT__ "MatEqual_MPIAIJ"
1711 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1712 {
1713   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1714   Mat            a,b,c,d;
1715   PetscTruth     flg;
1716   PetscErrorCode ierr;
1717 
1718   PetscFunctionBegin;
1719   a = matA->A; b = matA->B;
1720   c = matB->A; d = matB->B;
1721 
1722   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1723   if (flg) {
1724     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1725   }
1726   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1727   PetscFunctionReturn(0);
1728 }
1729 
1730 #undef __FUNCT__
1731 #define __FUNCT__ "MatCopy_MPIAIJ"
1732 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1733 {
1734   PetscErrorCode ierr;
1735   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1736   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1737 
1738   PetscFunctionBegin;
1739   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1740   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1741     /* because of the column compression in the off-processor part of the matrix a->B,
1742        the number of columns in a->B and b->B may be different, hence we cannot call
1743        the MatCopy() directly on the two parts. If need be, we can provide a more
1744        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1745        then copying the submatrices */
1746     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1747   } else {
1748     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1749     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1750   }
1751   PetscFunctionReturn(0);
1752 }
1753 
1754 #undef __FUNCT__
1755 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1756 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1757 {
1758   PetscErrorCode ierr;
1759 
1760   PetscFunctionBegin;
1761   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1762   PetscFunctionReturn(0);
1763 }
1764 
1765 #include "petscblaslapack.h"
1766 #undef __FUNCT__
1767 #define __FUNCT__ "MatAXPY_MPIAIJ"
1768 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1769 {
1770   PetscErrorCode ierr;
1771   PetscInt       i;
1772   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1773   PetscBLASInt   bnz,one=1;
1774   Mat_SeqAIJ     *x,*y;
1775 
1776   PetscFunctionBegin;
1777   if (str == SAME_NONZERO_PATTERN) {
1778     PetscScalar alpha = a;
1779     x = (Mat_SeqAIJ *)xx->A->data;
1780     y = (Mat_SeqAIJ *)yy->A->data;
1781     bnz = PetscBLASIntCast(x->nz);
1782     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1783     x = (Mat_SeqAIJ *)xx->B->data;
1784     y = (Mat_SeqAIJ *)yy->B->data;
1785     bnz = PetscBLASIntCast(x->nz);
1786     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1787   } else if (str == SUBSET_NONZERO_PATTERN) {
1788     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1789 
1790     x = (Mat_SeqAIJ *)xx->B->data;
1791     y = (Mat_SeqAIJ *)yy->B->data;
1792     if (y->xtoy && y->XtoY != xx->B) {
1793       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1794       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1795     }
1796     if (!y->xtoy) { /* get xtoy */
1797       ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1798       y->XtoY = xx->B;
1799       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1800     }
1801     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1802   } else {
1803     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1804   }
1805   PetscFunctionReturn(0);
1806 }
1807 
1808 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1809 
1810 #undef __FUNCT__
1811 #define __FUNCT__ "MatConjugate_MPIAIJ"
1812 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1813 {
1814 #if defined(PETSC_USE_COMPLEX)
1815   PetscErrorCode ierr;
1816   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1817 
1818   PetscFunctionBegin;
1819   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1820   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1821 #else
1822   PetscFunctionBegin;
1823 #endif
1824   PetscFunctionReturn(0);
1825 }
1826 
1827 #undef __FUNCT__
1828 #define __FUNCT__ "MatRealPart_MPIAIJ"
1829 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1830 {
1831   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1832   PetscErrorCode ierr;
1833 
1834   PetscFunctionBegin;
1835   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1836   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1837   PetscFunctionReturn(0);
1838 }
1839 
1840 #undef __FUNCT__
1841 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1842 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1843 {
1844   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1845   PetscErrorCode ierr;
1846 
1847   PetscFunctionBegin;
1848   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1849   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1850   PetscFunctionReturn(0);
1851 }
1852 
1853 #ifdef PETSC_HAVE_PBGL
1854 
1855 #include <boost/parallel/mpi/bsp_process_group.hpp>
1856 #include <boost/graph/distributed/ilu_default_graph.hpp>
1857 #include <boost/graph/distributed/ilu_0_block.hpp>
1858 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1859 #include <boost/graph/distributed/petsc/interface.hpp>
1860 #include <boost/multi_array.hpp>
1861 #include <boost/parallel/distributed_property_map.hpp>
1862 
1863 #undef __FUNCT__
1864 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1865 /*
1866   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1867 */
1868 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact)
1869 {
1870   namespace petsc = boost::distributed::petsc;
1871 
1872   namespace graph_dist = boost::graph::distributed;
1873   using boost::graph::distributed::ilu_default::process_group_type;
1874   using boost::graph::ilu_permuted;
1875 
1876   PetscTruth      row_identity, col_identity;
1877   PetscContainer  c;
1878   PetscInt        m, n, M, N;
1879   PetscErrorCode  ierr;
1880 
1881   PetscFunctionBegin;
1882   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1883   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1884   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1885   if (!row_identity || !col_identity) {
1886     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1887   }
1888 
1889   process_group_type pg;
1890   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1891   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1892   lgraph_type&   level_graph = *lgraph_p;
1893   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1894 
1895   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1896   ilu_permuted(level_graph);
1897 
1898   /* put together the new matrix */
1899   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1900   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1901   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1902   ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr);
1903   ierr = MatSetType(*fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1904   ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1905   ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1906   (*fact)->factor = FACTOR_LU;
1907 
1908   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1909   ierr = PetscContainerSetPointer(c, lgraph_p);
1910   ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c);
1911   PetscFunctionReturn(0);
1912 }
1913 
1914 #undef __FUNCT__
1915 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1916 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B)
1917 {
1918   PetscFunctionBegin;
1919   PetscFunctionReturn(0);
1920 }
1921 
1922 #undef __FUNCT__
1923 #define __FUNCT__ "MatSolve_MPIAIJ"
1924 /*
1925   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1926 */
1927 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1928 {
1929   namespace graph_dist = boost::graph::distributed;
1930 
1931   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1932   lgraph_type*   lgraph_p;
1933   PetscContainer c;
1934   PetscErrorCode ierr;
1935 
1936   PetscFunctionBegin;
1937   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1938   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1939   ierr = VecCopy(b, x); CHKERRQ(ierr);
1940 
1941   PetscScalar* array_x;
1942   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1943   PetscInt sx;
1944   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1945 
1946   PetscScalar* array_b;
1947   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1948   PetscInt sb;
1949   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1950 
1951   lgraph_type&   level_graph = *lgraph_p;
1952   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1953 
1954   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
1955   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
1956                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
1957 
1958   typedef boost::iterator_property_map<array_ref_type::iterator,
1959                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
1960   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
1961                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
1962 
1963   ilu_set_solve(*lgraph_p, vector_b, vector_x);
1964 
1965   PetscFunctionReturn(0);
1966 }
1967 #endif
1968 
1969 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
1970   PetscInt       nzlocal,nsends,nrecvs;
1971   PetscMPIInt    *send_rank;
1972   PetscInt       *sbuf_nz,*sbuf_j,**rbuf_j;
1973   PetscScalar    *sbuf_a,**rbuf_a;
1974   PetscErrorCode (*MatDestroy)(Mat);
1975 } Mat_Redundant;
1976 
1977 #undef __FUNCT__
1978 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
1979 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
1980 {
1981   PetscErrorCode       ierr;
1982   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
1983   PetscInt             i;
1984 
1985   PetscFunctionBegin;
1986   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
1987   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1988   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1989   for (i=0; i<redund->nrecvs; i++){
1990     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1991     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1992   }
1993   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1994   ierr = PetscFree(redund);CHKERRQ(ierr);
1995   PetscFunctionReturn(0);
1996 }
1997 
1998 #undef __FUNCT__
1999 #define __FUNCT__ "MatDestroy_MatRedundant"
2000 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2001 {
2002   PetscErrorCode  ierr;
2003   PetscContainer  container;
2004   Mat_Redundant   *redund=PETSC_NULL;
2005 
2006   PetscFunctionBegin;
2007   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2008   if (container) {
2009     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2010   } else {
2011     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2012   }
2013   A->ops->destroy = redund->MatDestroy;
2014   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2015   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2016   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2017   PetscFunctionReturn(0);
2018 }
2019 
2020 #undef __FUNCT__
2021 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2022 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2023 {
2024   PetscMPIInt    rank,size;
2025   MPI_Comm       comm=((PetscObject)mat)->comm;
2026   PetscErrorCode ierr;
2027   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2028   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2029   PetscInt       *rowrange=mat->rmap.range;
2030   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2031   Mat            A=aij->A,B=aij->B,C=*matredundant;
2032   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2033   PetscScalar    *sbuf_a;
2034   PetscInt       nzlocal=a->nz+b->nz;
2035   PetscInt       j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2036   PetscInt       rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N;
2037   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2038   MatScalar      *aworkA,*aworkB;
2039   PetscScalar    *vals;
2040   PetscMPIInt    tag1,tag2,tag3,imdex;
2041   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2042                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2043   MPI_Status     recv_status,*send_status;
2044   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2045   PetscInt       **rbuf_j=PETSC_NULL;
2046   PetscScalar    **rbuf_a=PETSC_NULL;
2047   Mat_Redundant  *redund=PETSC_NULL;
2048   PetscContainer container;
2049 
2050   PetscFunctionBegin;
2051   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2052   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2053 
2054   if (reuse == MAT_REUSE_MATRIX) {
2055     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2056     if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2057     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2058     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2059     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2060     if (container) {
2061       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2062     } else {
2063       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2064     }
2065     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2066 
2067     nsends    = redund->nsends;
2068     nrecvs    = redund->nrecvs;
2069     send_rank = redund->send_rank; recv_rank = send_rank + size;
2070     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
2071     sbuf_j    = redund->sbuf_j;
2072     sbuf_a    = redund->sbuf_a;
2073     rbuf_j    = redund->rbuf_j;
2074     rbuf_a    = redund->rbuf_a;
2075   }
2076 
2077   if (reuse == MAT_INITIAL_MATRIX){
2078     PetscMPIInt  subrank,subsize;
2079     PetscInt     nleftover,np_subcomm;
2080     /* get the destination processors' id send_rank, nsends and nrecvs */
2081     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2082     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2083     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
2084     recv_rank = send_rank + size;
2085     np_subcomm = size/nsubcomm;
2086     nleftover  = size - nsubcomm*np_subcomm;
2087     nsends = 0; nrecvs = 0;
2088     for (i=0; i<size; i++){ /* i=rank*/
2089       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2090         send_rank[nsends] = i; nsends++;
2091         recv_rank[nrecvs++] = i;
2092       }
2093     }
2094     if (rank >= size - nleftover){/* this proc is a leftover processor */
2095       i = size-nleftover-1;
2096       j = 0;
2097       while (j < nsubcomm - nleftover){
2098         send_rank[nsends++] = i;
2099         i--; j++;
2100       }
2101     }
2102 
2103     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2104       for (i=0; i<nleftover; i++){
2105         recv_rank[nrecvs++] = size-nleftover+i;
2106       }
2107     }
2108 
2109     /* allocate sbuf_j, sbuf_a */
2110     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2111     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2112     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2113   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2114 
2115   /* copy mat's local entries into the buffers */
2116   if (reuse == MAT_INITIAL_MATRIX){
2117     rownz_max = 0;
2118     rptr = sbuf_j;
2119     cols = sbuf_j + rend-rstart + 1;
2120     vals = sbuf_a;
2121     rptr[0] = 0;
2122     for (i=0; i<rend-rstart; i++){
2123       row = i + rstart;
2124       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2125       ncols  = nzA + nzB;
2126       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2127       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2128       /* load the column indices for this row into cols */
2129       lwrite = 0;
2130       for (l=0; l<nzB; l++) {
2131         if ((ctmp = bmap[cworkB[l]]) < cstart){
2132           vals[lwrite]   = aworkB[l];
2133           cols[lwrite++] = ctmp;
2134         }
2135       }
2136       for (l=0; l<nzA; l++){
2137         vals[lwrite]   = aworkA[l];
2138         cols[lwrite++] = cstart + cworkA[l];
2139       }
2140       for (l=0; l<nzB; l++) {
2141         if ((ctmp = bmap[cworkB[l]]) >= cend){
2142           vals[lwrite]   = aworkB[l];
2143           cols[lwrite++] = ctmp;
2144         }
2145       }
2146       vals += ncols;
2147       cols += ncols;
2148       rptr[i+1] = rptr[i] + ncols;
2149       if (rownz_max < ncols) rownz_max = ncols;
2150     }
2151     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2152   } else { /* only copy matrix values into sbuf_a */
2153     rptr = sbuf_j;
2154     vals = sbuf_a;
2155     rptr[0] = 0;
2156     for (i=0; i<rend-rstart; i++){
2157       row = i + rstart;
2158       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2159       ncols  = nzA + nzB;
2160       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2161       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2162       lwrite = 0;
2163       for (l=0; l<nzB; l++) {
2164         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2165       }
2166       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2167       for (l=0; l<nzB; l++) {
2168         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2169       }
2170       vals += ncols;
2171       rptr[i+1] = rptr[i] + ncols;
2172     }
2173   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2174 
2175   /* send nzlocal to others, and recv other's nzlocal */
2176   /*--------------------------------------------------*/
2177   if (reuse == MAT_INITIAL_MATRIX){
2178     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2179     s_waits2 = s_waits3 + nsends;
2180     s_waits1 = s_waits2 + nsends;
2181     r_waits1 = s_waits1 + nsends;
2182     r_waits2 = r_waits1 + nrecvs;
2183     r_waits3 = r_waits2 + nrecvs;
2184   } else {
2185     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2186     r_waits3 = s_waits3 + nsends;
2187   }
2188 
2189   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2190   if (reuse == MAT_INITIAL_MATRIX){
2191     /* get new tags to keep the communication clean */
2192     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2193     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2194     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2195     rbuf_nz = sbuf_nz + nsends;
2196 
2197     /* post receives of other's nzlocal */
2198     for (i=0; i<nrecvs; i++){
2199       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2200     }
2201     /* send nzlocal to others */
2202     for (i=0; i<nsends; i++){
2203       sbuf_nz[i] = nzlocal;
2204       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2205     }
2206     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2207     count = nrecvs;
2208     while (count) {
2209       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2210       recv_rank[imdex] = recv_status.MPI_SOURCE;
2211       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2212       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2213 
2214       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2215       rbuf_nz[imdex] += i + 2;
2216       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2217       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2218       count--;
2219     }
2220     /* wait on sends of nzlocal */
2221     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2222     /* send mat->i,j to others, and recv from other's */
2223     /*------------------------------------------------*/
2224     for (i=0; i<nsends; i++){
2225       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2226       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2227     }
2228     /* wait on receives of mat->i,j */
2229     /*------------------------------*/
2230     count = nrecvs;
2231     while (count) {
2232       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2233       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2234       count--;
2235     }
2236     /* wait on sends of mat->i,j */
2237     /*---------------------------*/
2238     if (nsends) {
2239       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2240     }
2241   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2242 
2243   /* post receives, send and receive mat->a */
2244   /*----------------------------------------*/
2245   for (imdex=0; imdex<nrecvs; imdex++) {
2246     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2247   }
2248   for (i=0; i<nsends; i++){
2249     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2250   }
2251   count = nrecvs;
2252   while (count) {
2253     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2254     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2255     count--;
2256   }
2257   if (nsends) {
2258     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2259   }
2260 
2261   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2262 
2263   /* create redundant matrix */
2264   /*-------------------------*/
2265   if (reuse == MAT_INITIAL_MATRIX){
2266     /* compute rownz_max for preallocation */
2267     for (imdex=0; imdex<nrecvs; imdex++){
2268       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2269       rptr = rbuf_j[imdex];
2270       for (i=0; i<j; i++){
2271         ncols = rptr[i+1] - rptr[i];
2272         if (rownz_max < ncols) rownz_max = ncols;
2273       }
2274     }
2275 
2276     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2277     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2278     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2279     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2280     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2281   } else {
2282     C = *matredundant;
2283   }
2284 
2285   /* insert local matrix entries */
2286   rptr = sbuf_j;
2287   cols = sbuf_j + rend-rstart + 1;
2288   vals = sbuf_a;
2289   for (i=0; i<rend-rstart; i++){
2290     row   = i + rstart;
2291     ncols = rptr[i+1] - rptr[i];
2292     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2293     vals += ncols;
2294     cols += ncols;
2295   }
2296   /* insert received matrix entries */
2297   for (imdex=0; imdex<nrecvs; imdex++){
2298     rstart = rowrange[recv_rank[imdex]];
2299     rend   = rowrange[recv_rank[imdex]+1];
2300     rptr = rbuf_j[imdex];
2301     cols = rbuf_j[imdex] + rend-rstart + 1;
2302     vals = rbuf_a[imdex];
2303     for (i=0; i<rend-rstart; i++){
2304       row   = i + rstart;
2305       ncols = rptr[i+1] - rptr[i];
2306       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2307       vals += ncols;
2308       cols += ncols;
2309     }
2310   }
2311   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2312   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2313   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2314   if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N);
2315   if (reuse == MAT_INITIAL_MATRIX){
2316     PetscContainer container;
2317     *matredundant = C;
2318     /* create a supporting struct and attach it to C for reuse */
2319     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2320     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2321     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2322     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2323     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2324 
2325     redund->nzlocal = nzlocal;
2326     redund->nsends  = nsends;
2327     redund->nrecvs  = nrecvs;
2328     redund->send_rank = send_rank;
2329     redund->sbuf_nz = sbuf_nz;
2330     redund->sbuf_j  = sbuf_j;
2331     redund->sbuf_a  = sbuf_a;
2332     redund->rbuf_j  = rbuf_j;
2333     redund->rbuf_a  = rbuf_a;
2334 
2335     redund->MatDestroy = C->ops->destroy;
2336     C->ops->destroy    = MatDestroy_MatRedundant;
2337   }
2338   PetscFunctionReturn(0);
2339 }
2340 
2341 #undef __FUNCT__
2342 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2343 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2344 {
2345   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2346   PetscErrorCode ierr;
2347   PetscInt       i,*idxb = 0;
2348   PetscScalar    *va,*vb;
2349   Vec            vtmp;
2350 
2351   PetscFunctionBegin;
2352   ierr = PetscMemzero(idx,A->cmap.n*sizeof(PetscInt));CHKERRQ(ierr);
2353   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2354   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2355   if (idx) {
2356     for (i=0; i<A->cmap.n; i++) {
2357       if (PetscAbsScalar(va[i])) idx[i] += A->cmap.rstart;
2358     }
2359   }
2360 
2361   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap.n,&vtmp);CHKERRQ(ierr);
2362   if (idx) {
2363     ierr = PetscMalloc(A->rmap.n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2364   }
2365   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2366   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2367 
2368   for (i=0; i<A->rmap.n; i++){
2369     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2370       va[i] = vb[i];
2371       if (idx) idx[i] = a->garray[idxb[i]];
2372     }
2373   }
2374 
2375   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2376   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2377   if (idxb) {
2378     ierr = PetscFree(idxb);CHKERRQ(ierr);
2379   }
2380   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2381   PetscFunctionReturn(0);
2382 }
2383 
2384 #undef __FUNCT__
2385 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2386 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2387 {
2388   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2389   PetscInt       n      = A->rmap.n;
2390   PetscInt       cstart = A->cmap.rstart;
2391   PetscInt      *cmap   = mat->garray;
2392   PetscInt      *diagIdx, *offdiagIdx;
2393   Vec            diagV, offdiagV;
2394   PetscScalar   *a, *diagA, *offdiagA;
2395   PetscInt       r;
2396   PetscErrorCode ierr;
2397 
2398   PetscFunctionBegin;
2399   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2400   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2401   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2402   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2403   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2404   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2405   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2406   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2407   for(r = 0; r < n; ++r) {
2408     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2409       a[r]   = diagA[r];
2410       idx[r] = cstart + diagIdx[r];
2411     } else {
2412       a[r]   = offdiagA[r];
2413       idx[r] = cmap[offdiagIdx[r]];
2414     }
2415   }
2416   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2417   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2418   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2419   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2420   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2421   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2422   PetscFunctionReturn(0);
2423 }
2424 
2425 #undef __FUNCT__
2426 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2427 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[])
2428 {
2429   PetscErrorCode ierr;
2430 
2431   PetscFunctionBegin;
2432   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
2433   PetscFunctionReturn(0);
2434 }
2435 
2436 /* -------------------------------------------------------------------*/
2437 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2438        MatGetRow_MPIAIJ,
2439        MatRestoreRow_MPIAIJ,
2440        MatMult_MPIAIJ,
2441 /* 4*/ MatMultAdd_MPIAIJ,
2442        MatMultTranspose_MPIAIJ,
2443        MatMultTransposeAdd_MPIAIJ,
2444 #ifdef PETSC_HAVE_PBGL
2445        MatSolve_MPIAIJ,
2446 #else
2447        0,
2448 #endif
2449        0,
2450        0,
2451 /*10*/ 0,
2452        0,
2453        0,
2454        MatRelax_MPIAIJ,
2455        MatTranspose_MPIAIJ,
2456 /*15*/ MatGetInfo_MPIAIJ,
2457        MatEqual_MPIAIJ,
2458        MatGetDiagonal_MPIAIJ,
2459        MatDiagonalScale_MPIAIJ,
2460        MatNorm_MPIAIJ,
2461 /*20*/ MatAssemblyBegin_MPIAIJ,
2462        MatAssemblyEnd_MPIAIJ,
2463        0,
2464        MatSetOption_MPIAIJ,
2465        MatZeroEntries_MPIAIJ,
2466 /*25*/ MatZeroRows_MPIAIJ,
2467        0,
2468 #ifdef PETSC_HAVE_PBGL
2469        MatLUFactorNumeric_MPIAIJ,
2470 #else
2471        0,
2472 #endif
2473        0,
2474        0,
2475 /*30*/ MatSetUpPreallocation_MPIAIJ,
2476 #ifdef PETSC_HAVE_PBGL
2477        MatILUFactorSymbolic_MPIAIJ,
2478 #else
2479        0,
2480 #endif
2481        0,
2482        0,
2483        0,
2484 /*35*/ MatDuplicate_MPIAIJ,
2485        0,
2486        0,
2487        0,
2488        0,
2489 /*40*/ MatAXPY_MPIAIJ,
2490        MatGetSubMatrices_MPIAIJ,
2491        MatIncreaseOverlap_MPIAIJ,
2492        MatGetValues_MPIAIJ,
2493        MatCopy_MPIAIJ,
2494 /*45*/ 0,
2495        MatScale_MPIAIJ,
2496        0,
2497        0,
2498        0,
2499 /*50*/ MatSetBlockSize_MPIAIJ,
2500        0,
2501        0,
2502        0,
2503        0,
2504 /*55*/ MatFDColoringCreate_MPIAIJ,
2505        0,
2506        MatSetUnfactored_MPIAIJ,
2507        MatPermute_MPIAIJ,
2508        0,
2509 /*60*/ MatGetSubMatrix_MPIAIJ,
2510        MatDestroy_MPIAIJ,
2511        MatView_MPIAIJ,
2512        0,
2513        0,
2514 /*65*/ 0,
2515        0,
2516        0,
2517        0,
2518        0,
2519 /*70*/ MatGetRowMaxAbs_MPIAIJ,
2520        0,
2521        MatSetColoring_MPIAIJ,
2522 #if defined(PETSC_HAVE_ADIC)
2523        MatSetValuesAdic_MPIAIJ,
2524 #else
2525        0,
2526 #endif
2527        MatSetValuesAdifor_MPIAIJ,
2528 /*75*/ 0,
2529        0,
2530        0,
2531        0,
2532        0,
2533 /*80*/ 0,
2534        0,
2535        0,
2536        0,
2537 /*84*/ MatLoad_MPIAIJ,
2538        0,
2539        0,
2540        0,
2541        0,
2542        0,
2543 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
2544        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2545        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2546        MatPtAP_Basic,
2547        MatPtAPSymbolic_MPIAIJ,
2548 /*95*/ MatPtAPNumeric_MPIAIJ,
2549        0,
2550        0,
2551        0,
2552        0,
2553 /*100*/0,
2554        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2555        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2556        MatConjugate_MPIAIJ,
2557        0,
2558 /*105*/MatSetValuesRow_MPIAIJ,
2559        MatRealPart_MPIAIJ,
2560        MatImaginaryPart_MPIAIJ,
2561        0,
2562        0,
2563 /*110*/0,
2564        MatGetRedundantMatrix_MPIAIJ,
2565        MatGetRowMin_MPIAIJ,
2566        0,
2567        0,
2568 /*115*/MatGetSeqNonzerostructure_MPIAIJ};
2569 
2570 /* ----------------------------------------------------------------------------------------*/
2571 
2572 EXTERN_C_BEGIN
2573 #undef __FUNCT__
2574 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2575 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2576 {
2577   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2578   PetscErrorCode ierr;
2579 
2580   PetscFunctionBegin;
2581   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2582   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2583   PetscFunctionReturn(0);
2584 }
2585 EXTERN_C_END
2586 
2587 EXTERN_C_BEGIN
2588 #undef __FUNCT__
2589 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2590 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2591 {
2592   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2593   PetscErrorCode ierr;
2594 
2595   PetscFunctionBegin;
2596   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2597   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2598   PetscFunctionReturn(0);
2599 }
2600 EXTERN_C_END
2601 
2602 #include "petscpc.h"
2603 EXTERN_C_BEGIN
2604 #undef __FUNCT__
2605 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2606 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2607 {
2608   Mat_MPIAIJ     *b;
2609   PetscErrorCode ierr;
2610   PetscInt       i;
2611 
2612   PetscFunctionBegin;
2613   B->preallocated = PETSC_TRUE;
2614   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2615   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2616   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2617   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2618 
2619   B->rmap.bs = B->cmap.bs = 1;
2620   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2621   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2622   if (d_nnz) {
2623     for (i=0; i<B->rmap.n; i++) {
2624       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2625     }
2626   }
2627   if (o_nnz) {
2628     for (i=0; i<B->rmap.n; i++) {
2629       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2630     }
2631   }
2632   b = (Mat_MPIAIJ*)B->data;
2633 
2634   /* Explicitly create 2 MATSEQAIJ matrices. */
2635   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2636   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
2637   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2638   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2639   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2640   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
2641   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2642   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2643 
2644   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2645   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2646 
2647   PetscFunctionReturn(0);
2648 }
2649 EXTERN_C_END
2650 
2651 #undef __FUNCT__
2652 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2653 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2654 {
2655   Mat            mat;
2656   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2657   PetscErrorCode ierr;
2658 
2659   PetscFunctionBegin;
2660   *newmat       = 0;
2661   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2662   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2663   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2664   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2665   a    = (Mat_MPIAIJ*)mat->data;
2666 
2667   mat->factor       = matin->factor;
2668   mat->rmap.bs      = matin->rmap.bs;
2669   mat->assembled    = PETSC_TRUE;
2670   mat->insertmode   = NOT_SET_VALUES;
2671   mat->preallocated = PETSC_TRUE;
2672 
2673   a->size           = oldmat->size;
2674   a->rank           = oldmat->rank;
2675   a->donotstash     = oldmat->donotstash;
2676   a->roworiented    = oldmat->roworiented;
2677   a->rowindices     = 0;
2678   a->rowvalues      = 0;
2679   a->getrowactive   = PETSC_FALSE;
2680 
2681   ierr = PetscMapCopy(((PetscObject)mat)->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2682   ierr = PetscMapCopy(((PetscObject)mat)->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2683 
2684   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2685   if (oldmat->colmap) {
2686 #if defined (PETSC_USE_CTABLE)
2687     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2688 #else
2689     ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2690     ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2691     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2692 #endif
2693   } else a->colmap = 0;
2694   if (oldmat->garray) {
2695     PetscInt len;
2696     len  = oldmat->B->cmap.n;
2697     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2698     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2699     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2700   } else a->garray = 0;
2701 
2702   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2703   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2704   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2705   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2706   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2707   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2708   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2709   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2710   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2711   *newmat = mat;
2712   PetscFunctionReturn(0);
2713 }
2714 
2715 #include "petscsys.h"
2716 
2717 #undef __FUNCT__
2718 #define __FUNCT__ "MatLoad_MPIAIJ"
2719 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2720 {
2721   Mat            A;
2722   PetscScalar    *vals,*svals;
2723   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2724   MPI_Status     status;
2725   PetscErrorCode ierr;
2726   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
2727   PetscInt       i,nz,j,rstart,rend,mmax;
2728   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2729   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2730   PetscInt       cend,cstart,n,*rowners;
2731   int            fd;
2732 
2733   PetscFunctionBegin;
2734   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2735   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2736   if (!rank) {
2737     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2738     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2739     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2740   }
2741 
2742   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2743   M = header[1]; N = header[2];
2744   /* determine ownership of all rows */
2745   m    = M/size + ((M % size) > rank);
2746   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2747   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2748 
2749   /* First process needs enough room for process with most rows */
2750   if (!rank) {
2751     mmax       = rowners[1];
2752     for (i=2; i<size; i++) {
2753       mmax = PetscMax(mmax,rowners[i]);
2754     }
2755   } else mmax = m;
2756 
2757   rowners[0] = 0;
2758   for (i=2; i<=size; i++) {
2759     rowners[i] += rowners[i-1];
2760   }
2761   rstart = rowners[rank];
2762   rend   = rowners[rank+1];
2763 
2764   /* distribute row lengths to all processors */
2765   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2766   if (!rank) {
2767     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2768     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2769     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2770     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2771     for (j=0; j<m; j++) {
2772       procsnz[0] += ourlens[j];
2773     }
2774     for (i=1; i<size; i++) {
2775       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2776       /* calculate the number of nonzeros on each processor */
2777       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2778         procsnz[i] += rowlengths[j];
2779       }
2780       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2781     }
2782     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2783   } else {
2784     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2785   }
2786 
2787   if (!rank) {
2788     /* determine max buffer needed and allocate it */
2789     maxnz = 0;
2790     for (i=0; i<size; i++) {
2791       maxnz = PetscMax(maxnz,procsnz[i]);
2792     }
2793     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2794 
2795     /* read in my part of the matrix column indices  */
2796     nz   = procsnz[0];
2797     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2798     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2799 
2800     /* read in every one elses and ship off */
2801     for (i=1; i<size; i++) {
2802       nz   = procsnz[i];
2803       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2804       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2805     }
2806     ierr = PetscFree(cols);CHKERRQ(ierr);
2807   } else {
2808     /* determine buffer space needed for message */
2809     nz = 0;
2810     for (i=0; i<m; i++) {
2811       nz += ourlens[i];
2812     }
2813     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2814 
2815     /* receive message of column indices*/
2816     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2817     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2818     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2819   }
2820 
2821   /* determine column ownership if matrix is not square */
2822   if (N != M) {
2823     n      = N/size + ((N % size) > rank);
2824     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2825     cstart = cend - n;
2826   } else {
2827     cstart = rstart;
2828     cend   = rend;
2829     n      = cend - cstart;
2830   }
2831 
2832   /* loop over local rows, determining number of off diagonal entries */
2833   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2834   jj = 0;
2835   for (i=0; i<m; i++) {
2836     for (j=0; j<ourlens[i]; j++) {
2837       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2838       jj++;
2839     }
2840   }
2841 
2842   /* create our matrix */
2843   for (i=0; i<m; i++) {
2844     ourlens[i] -= offlens[i];
2845   }
2846   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2847   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2848   ierr = MatSetType(A,type);CHKERRQ(ierr);
2849   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2850 
2851   for (i=0; i<m; i++) {
2852     ourlens[i] += offlens[i];
2853   }
2854 
2855   if (!rank) {
2856     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2857 
2858     /* read in my part of the matrix numerical values  */
2859     nz   = procsnz[0];
2860     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2861 
2862     /* insert into matrix */
2863     jj      = rstart;
2864     smycols = mycols;
2865     svals   = vals;
2866     for (i=0; i<m; i++) {
2867       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2868       smycols += ourlens[i];
2869       svals   += ourlens[i];
2870       jj++;
2871     }
2872 
2873     /* read in other processors and ship out */
2874     for (i=1; i<size; i++) {
2875       nz   = procsnz[i];
2876       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2877       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2878     }
2879     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2880   } else {
2881     /* receive numeric values */
2882     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2883 
2884     /* receive message of values*/
2885     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2886     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2887     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2888 
2889     /* insert into matrix */
2890     jj      = rstart;
2891     smycols = mycols;
2892     svals   = vals;
2893     for (i=0; i<m; i++) {
2894       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2895       smycols += ourlens[i];
2896       svals   += ourlens[i];
2897       jj++;
2898     }
2899   }
2900   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2901   ierr = PetscFree(vals);CHKERRQ(ierr);
2902   ierr = PetscFree(mycols);CHKERRQ(ierr);
2903   ierr = PetscFree(rowners);CHKERRQ(ierr);
2904 
2905   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2906   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2907   *newmat = A;
2908   PetscFunctionReturn(0);
2909 }
2910 
2911 #undef __FUNCT__
2912 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2913 /*
2914     Not great since it makes two copies of the submatrix, first an SeqAIJ
2915   in local and then by concatenating the local matrices the end result.
2916   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2917 */
2918 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2919 {
2920   PetscErrorCode ierr;
2921   PetscMPIInt    rank,size;
2922   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2923   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2924   Mat            *local,M,Mreuse;
2925   MatScalar      *vwork,*aa;
2926   MPI_Comm       comm = ((PetscObject)mat)->comm;
2927   Mat_SeqAIJ     *aij;
2928 
2929 
2930   PetscFunctionBegin;
2931   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2932   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2933 
2934   if (call ==  MAT_REUSE_MATRIX) {
2935     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2936     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2937     local = &Mreuse;
2938     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2939   } else {
2940     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2941     Mreuse = *local;
2942     ierr   = PetscFree(local);CHKERRQ(ierr);
2943   }
2944 
2945   /*
2946       m - number of local rows
2947       n - number of columns (same on all processors)
2948       rstart - first row in new global matrix generated
2949   */
2950   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2951   if (call == MAT_INITIAL_MATRIX) {
2952     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2953     ii  = aij->i;
2954     jj  = aij->j;
2955 
2956     /*
2957         Determine the number of non-zeros in the diagonal and off-diagonal
2958         portions of the matrix in order to do correct preallocation
2959     */
2960 
2961     /* first get start and end of "diagonal" columns */
2962     if (csize == PETSC_DECIDE) {
2963       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2964       if (mglobal == n) { /* square matrix */
2965 	nlocal = m;
2966       } else {
2967         nlocal = n/size + ((n % size) > rank);
2968       }
2969     } else {
2970       nlocal = csize;
2971     }
2972     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2973     rstart = rend - nlocal;
2974     if (rank == size - 1 && rend != n) {
2975       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2976     }
2977 
2978     /* next, compute all the lengths */
2979     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2980     olens = dlens + m;
2981     for (i=0; i<m; i++) {
2982       jend = ii[i+1] - ii[i];
2983       olen = 0;
2984       dlen = 0;
2985       for (j=0; j<jend; j++) {
2986         if (*jj < rstart || *jj >= rend) olen++;
2987         else dlen++;
2988         jj++;
2989       }
2990       olens[i] = olen;
2991       dlens[i] = dlen;
2992     }
2993     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2994     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2995     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2996     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2997     ierr = PetscFree(dlens);CHKERRQ(ierr);
2998   } else {
2999     PetscInt ml,nl;
3000 
3001     M = *newmat;
3002     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3003     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3004     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3005     /*
3006          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3007        rather than the slower MatSetValues().
3008     */
3009     M->was_assembled = PETSC_TRUE;
3010     M->assembled     = PETSC_FALSE;
3011   }
3012   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3013   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3014   ii  = aij->i;
3015   jj  = aij->j;
3016   aa  = aij->a;
3017   for (i=0; i<m; i++) {
3018     row   = rstart + i;
3019     nz    = ii[i+1] - ii[i];
3020     cwork = jj;     jj += nz;
3021     vwork = aa;     aa += nz;
3022     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3023   }
3024 
3025   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3026   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3027   *newmat = M;
3028 
3029   /* save submatrix used in processor for next request */
3030   if (call ==  MAT_INITIAL_MATRIX) {
3031     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3032     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3033   }
3034 
3035   PetscFunctionReturn(0);
3036 }
3037 
3038 EXTERN_C_BEGIN
3039 #undef __FUNCT__
3040 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3041 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3042 {
3043   PetscInt       m,cstart, cend,j,nnz,i,d;
3044   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3045   const PetscInt *JJ;
3046   PetscScalar    *values;
3047   PetscErrorCode ierr;
3048 
3049   PetscFunctionBegin;
3050   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3051 
3052   B->rmap.bs = B->cmap.bs = 1;
3053   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
3054   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
3055   m      = B->rmap.n;
3056   cstart = B->cmap.rstart;
3057   cend   = B->cmap.rend;
3058   rstart = B->rmap.rstart;
3059 
3060   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
3061   o_nnz = d_nnz + m;
3062 
3063 #if defined(PETSC_USE_DEBUGGING)
3064   for (i=0; i<m; i++) {
3065     nnz     = Ii[i+1]- Ii[i];
3066     JJ      = J + Ii[i];
3067     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3068     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3069     if (nnz && (JJ[nnz-1] >= B->cmap.N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap.N);
3070     for (j=1; j<nnz; j++) {
3071       if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j);
3072     }
3073   }
3074 #endif
3075 
3076   for (i=0; i<m; i++) {
3077     nnz     = Ii[i+1]- Ii[i];
3078     JJ      = J + Ii[i];
3079     nnz_max = PetscMax(nnz_max,nnz);
3080     for (j=0; j<nnz; j++) {
3081       if (*JJ >= cstart) break;
3082       JJ++;
3083     }
3084     d = 0;
3085     for (; j<nnz; j++) {
3086       if (*JJ++ >= cend) break;
3087       d++;
3088     }
3089     d_nnz[i] = d;
3090     o_nnz[i] = nnz - d;
3091   }
3092   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3093   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
3094 
3095   if (v) values = (PetscScalar*)v;
3096   else {
3097     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3098     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3099   }
3100 
3101   for (i=0; i<m; i++) {
3102     ii   = i + rstart;
3103     nnz  = Ii[i+1]- Ii[i];
3104     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3105   }
3106   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3107   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3108 
3109   if (!v) {
3110     ierr = PetscFree(values);CHKERRQ(ierr);
3111   }
3112   PetscFunctionReturn(0);
3113 }
3114 EXTERN_C_END
3115 
3116 #undef __FUNCT__
3117 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3118 /*@
3119    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3120    (the default parallel PETSc format).
3121 
3122    Collective on MPI_Comm
3123 
3124    Input Parameters:
3125 +  B - the matrix
3126 .  i - the indices into j for the start of each local row (starts with zero)
3127 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3128 -  v - optional values in the matrix
3129 
3130    Level: developer
3131 
3132    Notes:
3133        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3134      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3135      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3136 
3137        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3138 
3139        The format which is used for the sparse matrix input, is equivalent to a
3140     row-major ordering.. i.e for the following matrix, the input data expected is
3141     as shown:
3142 
3143         1 0 0
3144         2 0 3     P0
3145        -------
3146         4 5 6     P1
3147 
3148      Process0 [P0]: rows_owned=[0,1]
3149         i =  {0,1,3}  [size = nrow+1  = 2+1]
3150         j =  {0,0,2}  [size = nz = 6]
3151         v =  {1,2,3}  [size = nz = 6]
3152 
3153      Process1 [P1]: rows_owned=[2]
3154         i =  {0,3}    [size = nrow+1  = 1+1]
3155         j =  {0,1,2}  [size = nz = 6]
3156         v =  {4,5,6}  [size = nz = 6]
3157 
3158       The column indices for each row MUST be sorted.
3159 
3160 .keywords: matrix, aij, compressed row, sparse, parallel
3161 
3162 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3163           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3164 @*/
3165 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3166 {
3167   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3168 
3169   PetscFunctionBegin;
3170   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3171   if (f) {
3172     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3173   }
3174   PetscFunctionReturn(0);
3175 }
3176 
3177 #undef __FUNCT__
3178 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3179 /*@C
3180    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3181    (the default parallel PETSc format).  For good matrix assembly performance
3182    the user should preallocate the matrix storage by setting the parameters
3183    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3184    performance can be increased by more than a factor of 50.
3185 
3186    Collective on MPI_Comm
3187 
3188    Input Parameters:
3189 +  A - the matrix
3190 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3191            (same value is used for all local rows)
3192 .  d_nnz - array containing the number of nonzeros in the various rows of the
3193            DIAGONAL portion of the local submatrix (possibly different for each row)
3194            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3195            The size of this array is equal to the number of local rows, i.e 'm'.
3196            You must leave room for the diagonal entry even if it is zero.
3197 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3198            submatrix (same value is used for all local rows).
3199 -  o_nnz - array containing the number of nonzeros in the various rows of the
3200            OFF-DIAGONAL portion of the local submatrix (possibly different for
3201            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3202            structure. The size of this array is equal to the number
3203            of local rows, i.e 'm'.
3204 
3205    If the *_nnz parameter is given then the *_nz parameter is ignored
3206 
3207    The AIJ format (also called the Yale sparse matrix format or
3208    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3209    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3210 
3211    The parallel matrix is partitioned such that the first m0 rows belong to
3212    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3213    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3214 
3215    The DIAGONAL portion of the local submatrix of a processor can be defined
3216    as the submatrix which is obtained by extraction the part corresponding
3217    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3218    first row that belongs to the processor, and r2 is the last row belonging
3219    to the this processor. This is a square mxm matrix. The remaining portion
3220    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3221 
3222    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3223 
3224    You can call MatGetInfo() to get information on how effective the preallocation was;
3225    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3226    You can also run with the option -info and look for messages with the string
3227    malloc in them to see if additional memory allocation was needed.
3228 
3229    Example usage:
3230 
3231    Consider the following 8x8 matrix with 34 non-zero values, that is
3232    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3233    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3234    as follows:
3235 
3236 .vb
3237             1  2  0  |  0  3  0  |  0  4
3238     Proc0   0  5  6  |  7  0  0  |  8  0
3239             9  0 10  | 11  0  0  | 12  0
3240     -------------------------------------
3241            13  0 14  | 15 16 17  |  0  0
3242     Proc1   0 18  0  | 19 20 21  |  0  0
3243             0  0  0  | 22 23  0  | 24  0
3244     -------------------------------------
3245     Proc2  25 26 27  |  0  0 28  | 29  0
3246            30  0  0  | 31 32 33  |  0 34
3247 .ve
3248 
3249    This can be represented as a collection of submatrices as:
3250 
3251 .vb
3252       A B C
3253       D E F
3254       G H I
3255 .ve
3256 
3257    Where the submatrices A,B,C are owned by proc0, D,E,F are
3258    owned by proc1, G,H,I are owned by proc2.
3259 
3260    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3261    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3262    The 'M','N' parameters are 8,8, and have the same values on all procs.
3263 
3264    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3265    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3266    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3267    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3268    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3269    matrix, ans [DF] as another SeqAIJ matrix.
3270 
3271    When d_nz, o_nz parameters are specified, d_nz storage elements are
3272    allocated for every row of the local diagonal submatrix, and o_nz
3273    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3274    One way to choose d_nz and o_nz is to use the max nonzerors per local
3275    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3276    In this case, the values of d_nz,o_nz are:
3277 .vb
3278      proc0 : dnz = 2, o_nz = 2
3279      proc1 : dnz = 3, o_nz = 2
3280      proc2 : dnz = 1, o_nz = 4
3281 .ve
3282    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3283    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3284    for proc3. i.e we are using 12+15+10=37 storage locations to store
3285    34 values.
3286 
3287    When d_nnz, o_nnz parameters are specified, the storage is specified
3288    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3289    In the above case the values for d_nnz,o_nnz are:
3290 .vb
3291      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3292      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3293      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3294 .ve
3295    Here the space allocated is sum of all the above values i.e 34, and
3296    hence pre-allocation is perfect.
3297 
3298    Level: intermediate
3299 
3300 .keywords: matrix, aij, compressed row, sparse, parallel
3301 
3302 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3303           MPIAIJ, MatGetInfo()
3304 @*/
3305 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3306 {
3307   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3308 
3309   PetscFunctionBegin;
3310   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3311   if (f) {
3312     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3313   }
3314   PetscFunctionReturn(0);
3315 }
3316 
3317 #undef __FUNCT__
3318 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3319 /*@
3320      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3321          CSR format the local rows.
3322 
3323    Collective on MPI_Comm
3324 
3325    Input Parameters:
3326 +  comm - MPI communicator
3327 .  m - number of local rows (Cannot be PETSC_DECIDE)
3328 .  n - This value should be the same as the local size used in creating the
3329        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3330        calculated if N is given) For square matrices n is almost always m.
3331 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3332 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3333 .   i - row indices
3334 .   j - column indices
3335 -   a - matrix values
3336 
3337    Output Parameter:
3338 .   mat - the matrix
3339 
3340    Level: intermediate
3341 
3342    Notes:
3343        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3344      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3345      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3346 
3347        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3348 
3349        The format which is used for the sparse matrix input, is equivalent to a
3350     row-major ordering.. i.e for the following matrix, the input data expected is
3351     as shown:
3352 
3353         1 0 0
3354         2 0 3     P0
3355        -------
3356         4 5 6     P1
3357 
3358      Process0 [P0]: rows_owned=[0,1]
3359         i =  {0,1,3}  [size = nrow+1  = 2+1]
3360         j =  {0,0,2}  [size = nz = 6]
3361         v =  {1,2,3}  [size = nz = 6]
3362 
3363      Process1 [P1]: rows_owned=[2]
3364         i =  {0,3}    [size = nrow+1  = 1+1]
3365         j =  {0,1,2}  [size = nz = 6]
3366         v =  {4,5,6}  [size = nz = 6]
3367 
3368 .keywords: matrix, aij, compressed row, sparse, parallel
3369 
3370 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3371           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3372 @*/
3373 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3374 {
3375   PetscErrorCode ierr;
3376 
3377  PetscFunctionBegin;
3378   if (i[0]) {
3379     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3380   }
3381   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3382   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3383   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3384   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3385   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3386   PetscFunctionReturn(0);
3387 }
3388 
3389 #undef __FUNCT__
3390 #define __FUNCT__ "MatCreateMPIAIJ"
3391 /*@C
3392    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3393    (the default parallel PETSc format).  For good matrix assembly performance
3394    the user should preallocate the matrix storage by setting the parameters
3395    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3396    performance can be increased by more than a factor of 50.
3397 
3398    Collective on MPI_Comm
3399 
3400    Input Parameters:
3401 +  comm - MPI communicator
3402 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3403            This value should be the same as the local size used in creating the
3404            y vector for the matrix-vector product y = Ax.
3405 .  n - This value should be the same as the local size used in creating the
3406        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3407        calculated if N is given) For square matrices n is almost always m.
3408 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3409 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3410 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3411            (same value is used for all local rows)
3412 .  d_nnz - array containing the number of nonzeros in the various rows of the
3413            DIAGONAL portion of the local submatrix (possibly different for each row)
3414            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3415            The size of this array is equal to the number of local rows, i.e 'm'.
3416            You must leave room for the diagonal entry even if it is zero.
3417 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3418            submatrix (same value is used for all local rows).
3419 -  o_nnz - array containing the number of nonzeros in the various rows of the
3420            OFF-DIAGONAL portion of the local submatrix (possibly different for
3421            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3422            structure. The size of this array is equal to the number
3423            of local rows, i.e 'm'.
3424 
3425    Output Parameter:
3426 .  A - the matrix
3427 
3428    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3429    MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
3430    true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
3431    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3432 
3433    Notes:
3434    If the *_nnz parameter is given then the *_nz parameter is ignored
3435 
3436    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3437    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3438    storage requirements for this matrix.
3439 
3440    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3441    processor than it must be used on all processors that share the object for
3442    that argument.
3443 
3444    The user MUST specify either the local or global matrix dimensions
3445    (possibly both).
3446 
3447    The parallel matrix is partitioned across processors such that the
3448    first m0 rows belong to process 0, the next m1 rows belong to
3449    process 1, the next m2 rows belong to process 2 etc.. where
3450    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3451    values corresponding to [m x N] submatrix.
3452 
3453    The columns are logically partitioned with the n0 columns belonging
3454    to 0th partition, the next n1 columns belonging to the next
3455    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3456 
3457    The DIAGONAL portion of the local submatrix on any given processor
3458    is the submatrix corresponding to the rows and columns m,n
3459    corresponding to the given processor. i.e diagonal matrix on
3460    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3461    etc. The remaining portion of the local submatrix [m x (N-n)]
3462    constitute the OFF-DIAGONAL portion. The example below better
3463    illustrates this concept.
3464 
3465    For a square global matrix we define each processor's diagonal portion
3466    to be its local rows and the corresponding columns (a square submatrix);
3467    each processor's off-diagonal portion encompasses the remainder of the
3468    local matrix (a rectangular submatrix).
3469 
3470    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3471 
3472    When calling this routine with a single process communicator, a matrix of
3473    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3474    type of communicator, use the construction mechanism:
3475      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
3476 
3477    By default, this format uses inodes (identical nodes) when possible.
3478    We search for consecutive rows with the same nonzero structure, thereby
3479    reusing matrix information to achieve increased efficiency.
3480 
3481    Options Database Keys:
3482 +  -mat_no_inode  - Do not use inodes
3483 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3484 -  -mat_aij_oneindex - Internally use indexing starting at 1
3485         rather than 0.  Note that when calling MatSetValues(),
3486         the user still MUST index entries starting at 0!
3487 
3488 
3489    Example usage:
3490 
3491    Consider the following 8x8 matrix with 34 non-zero values, that is
3492    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3493    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3494    as follows:
3495 
3496 .vb
3497             1  2  0  |  0  3  0  |  0  4
3498     Proc0   0  5  6  |  7  0  0  |  8  0
3499             9  0 10  | 11  0  0  | 12  0
3500     -------------------------------------
3501            13  0 14  | 15 16 17  |  0  0
3502     Proc1   0 18  0  | 19 20 21  |  0  0
3503             0  0  0  | 22 23  0  | 24  0
3504     -------------------------------------
3505     Proc2  25 26 27  |  0  0 28  | 29  0
3506            30  0  0  | 31 32 33  |  0 34
3507 .ve
3508 
3509    This can be represented as a collection of submatrices as:
3510 
3511 .vb
3512       A B C
3513       D E F
3514       G H I
3515 .ve
3516 
3517    Where the submatrices A,B,C are owned by proc0, D,E,F are
3518    owned by proc1, G,H,I are owned by proc2.
3519 
3520    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3521    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3522    The 'M','N' parameters are 8,8, and have the same values on all procs.
3523 
3524    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3525    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3526    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3527    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3528    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3529    matrix, ans [DF] as another SeqAIJ matrix.
3530 
3531    When d_nz, o_nz parameters are specified, d_nz storage elements are
3532    allocated for every row of the local diagonal submatrix, and o_nz
3533    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3534    One way to choose d_nz and o_nz is to use the max nonzerors per local
3535    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3536    In this case, the values of d_nz,o_nz are:
3537 .vb
3538      proc0 : dnz = 2, o_nz = 2
3539      proc1 : dnz = 3, o_nz = 2
3540      proc2 : dnz = 1, o_nz = 4
3541 .ve
3542    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3543    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3544    for proc3. i.e we are using 12+15+10=37 storage locations to store
3545    34 values.
3546 
3547    When d_nnz, o_nnz parameters are specified, the storage is specified
3548    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3549    In the above case the values for d_nnz,o_nnz are:
3550 .vb
3551      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3552      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3553      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3554 .ve
3555    Here the space allocated is sum of all the above values i.e 34, and
3556    hence pre-allocation is perfect.
3557 
3558    Level: intermediate
3559 
3560 .keywords: matrix, aij, compressed row, sparse, parallel
3561 
3562 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3563           MPIAIJ, MatCreateMPIAIJWithArrays()
3564 @*/
3565 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3566 {
3567   PetscErrorCode ierr;
3568   PetscMPIInt    size;
3569 
3570   PetscFunctionBegin;
3571   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3572   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3573   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3574   if (size > 1) {
3575     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3576     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3577   } else {
3578     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3579     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3580   }
3581   PetscFunctionReturn(0);
3582 }
3583 
3584 #undef __FUNCT__
3585 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3586 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3587 {
3588   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3589 
3590   PetscFunctionBegin;
3591   *Ad     = a->A;
3592   *Ao     = a->B;
3593   *colmap = a->garray;
3594   PetscFunctionReturn(0);
3595 }
3596 
3597 #undef __FUNCT__
3598 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3599 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3600 {
3601   PetscErrorCode ierr;
3602   PetscInt       i;
3603   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3604 
3605   PetscFunctionBegin;
3606   if (coloring->ctype == IS_COLORING_GLOBAL) {
3607     ISColoringValue *allcolors,*colors;
3608     ISColoring      ocoloring;
3609 
3610     /* set coloring for diagonal portion */
3611     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3612 
3613     /* set coloring for off-diagonal portion */
3614     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3615     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3616     for (i=0; i<a->B->cmap.n; i++) {
3617       colors[i] = allcolors[a->garray[i]];
3618     }
3619     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3620     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3621     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3622     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3623   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3624     ISColoringValue *colors;
3625     PetscInt        *larray;
3626     ISColoring      ocoloring;
3627 
3628     /* set coloring for diagonal portion */
3629     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3630     for (i=0; i<a->A->cmap.n; i++) {
3631       larray[i] = i + A->cmap.rstart;
3632     }
3633     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3634     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3635     for (i=0; i<a->A->cmap.n; i++) {
3636       colors[i] = coloring->colors[larray[i]];
3637     }
3638     ierr = PetscFree(larray);CHKERRQ(ierr);
3639     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3640     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3641     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3642 
3643     /* set coloring for off-diagonal portion */
3644     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3645     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3646     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3647     for (i=0; i<a->B->cmap.n; i++) {
3648       colors[i] = coloring->colors[larray[i]];
3649     }
3650     ierr = PetscFree(larray);CHKERRQ(ierr);
3651     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3652     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3653     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3654   } else {
3655     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3656   }
3657 
3658   PetscFunctionReturn(0);
3659 }
3660 
3661 #if defined(PETSC_HAVE_ADIC)
3662 #undef __FUNCT__
3663 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3664 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3665 {
3666   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3667   PetscErrorCode ierr;
3668 
3669   PetscFunctionBegin;
3670   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3671   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3672   PetscFunctionReturn(0);
3673 }
3674 #endif
3675 
3676 #undef __FUNCT__
3677 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3678 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3679 {
3680   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3681   PetscErrorCode ierr;
3682 
3683   PetscFunctionBegin;
3684   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3685   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3686   PetscFunctionReturn(0);
3687 }
3688 
3689 #undef __FUNCT__
3690 #define __FUNCT__ "MatMerge"
3691 /*@
3692       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3693                  matrices from each processor
3694 
3695     Collective on MPI_Comm
3696 
3697    Input Parameters:
3698 +    comm - the communicators the parallel matrix will live on
3699 .    inmat - the input sequential matrices
3700 .    n - number of local columns (or PETSC_DECIDE)
3701 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3702 
3703    Output Parameter:
3704 .    outmat - the parallel matrix generated
3705 
3706     Level: advanced
3707 
3708    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3709 
3710 @*/
3711 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3712 {
3713   PetscErrorCode ierr;
3714   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3715   PetscInt       *indx;
3716   PetscScalar    *values;
3717 
3718   PetscFunctionBegin;
3719   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3720   if (scall == MAT_INITIAL_MATRIX){
3721     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3722     if (n == PETSC_DECIDE){
3723       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3724     }
3725     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3726     rstart -= m;
3727 
3728     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3729     for (i=0;i<m;i++) {
3730       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3731       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3732       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3733     }
3734     /* This routine will ONLY return MPIAIJ type matrix */
3735     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3736     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3737     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3738     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3739     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3740 
3741   } else if (scall == MAT_REUSE_MATRIX){
3742     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3743   } else {
3744     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3745   }
3746 
3747   for (i=0;i<m;i++) {
3748     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3749     Ii    = i + rstart;
3750     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3751     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3752   }
3753   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3754   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3755   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3756 
3757   PetscFunctionReturn(0);
3758 }
3759 
3760 #undef __FUNCT__
3761 #define __FUNCT__ "MatFileSplit"
3762 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3763 {
3764   PetscErrorCode    ierr;
3765   PetscMPIInt       rank;
3766   PetscInt          m,N,i,rstart,nnz;
3767   size_t            len;
3768   const PetscInt    *indx;
3769   PetscViewer       out;
3770   char              *name;
3771   Mat               B;
3772   const PetscScalar *values;
3773 
3774   PetscFunctionBegin;
3775   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3776   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3777   /* Should this be the type of the diagonal block of A? */
3778   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3779   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3780   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3781   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3782   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3783   for (i=0;i<m;i++) {
3784     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3785     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3786     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3787   }
3788   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3789   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3790 
3791   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3792   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3793   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3794   sprintf(name,"%s.%d",outfile,rank);
3795   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3796   ierr = PetscFree(name);
3797   ierr = MatView(B,out);CHKERRQ(ierr);
3798   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3799   ierr = MatDestroy(B);CHKERRQ(ierr);
3800   PetscFunctionReturn(0);
3801 }
3802 
3803 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3804 #undef __FUNCT__
3805 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3806 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3807 {
3808   PetscErrorCode       ierr;
3809   Mat_Merge_SeqsToMPI  *merge;
3810   PetscContainer       container;
3811 
3812   PetscFunctionBegin;
3813   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3814   if (container) {
3815     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3816     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3817     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3818     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3819     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3820     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3821     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3822     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3823     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3824     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3825     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3826     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
3827 
3828     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
3829     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3830   }
3831   ierr = PetscFree(merge);CHKERRQ(ierr);
3832 
3833   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3834   PetscFunctionReturn(0);
3835 }
3836 
3837 #include "src/mat/utils/freespace.h"
3838 #include "petscbt.h"
3839 
3840 #undef __FUNCT__
3841 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3842 /*@C
3843       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3844                  matrices from each processor
3845 
3846     Collective on MPI_Comm
3847 
3848    Input Parameters:
3849 +    comm - the communicators the parallel matrix will live on
3850 .    seqmat - the input sequential matrices
3851 .    m - number of local rows (or PETSC_DECIDE)
3852 .    n - number of local columns (or PETSC_DECIDE)
3853 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3854 
3855    Output Parameter:
3856 .    mpimat - the parallel matrix generated
3857 
3858     Level: advanced
3859 
3860    Notes:
3861      The dimensions of the sequential matrix in each processor MUST be the same.
3862      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3863      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3864 @*/
3865 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3866 {
3867   PetscErrorCode       ierr;
3868   MPI_Comm             comm=((PetscObject)mpimat)->comm;
3869   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3870   PetscMPIInt          size,rank,taga,*len_s;
3871   PetscInt             N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j;
3872   PetscInt             proc,m;
3873   PetscInt             **buf_ri,**buf_rj;
3874   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3875   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3876   MPI_Request          *s_waits,*r_waits;
3877   MPI_Status           *status;
3878   MatScalar            *aa=a->a;
3879   MatScalar            **abuf_r,*ba_i;
3880   Mat_Merge_SeqsToMPI  *merge;
3881   PetscContainer       container;
3882 
3883   PetscFunctionBegin;
3884   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3885 
3886   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3887   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3888 
3889   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3890   if (container) {
3891     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3892   }
3893   bi     = merge->bi;
3894   bj     = merge->bj;
3895   buf_ri = merge->buf_ri;
3896   buf_rj = merge->buf_rj;
3897 
3898   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3899   owners = merge->rowmap.range;
3900   len_s  = merge->len_s;
3901 
3902   /* send and recv matrix values */
3903   /*-----------------------------*/
3904   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3905   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3906 
3907   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3908   for (proc=0,k=0; proc<size; proc++){
3909     if (!len_s[proc]) continue;
3910     i = owners[proc];
3911     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3912     k++;
3913   }
3914 
3915   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3916   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3917   ierr = PetscFree(status);CHKERRQ(ierr);
3918 
3919   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3920   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3921 
3922   /* insert mat values of mpimat */
3923   /*----------------------------*/
3924   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
3925   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3926   nextrow = buf_ri_k + merge->nrecv;
3927   nextai  = nextrow + merge->nrecv;
3928 
3929   for (k=0; k<merge->nrecv; k++){
3930     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3931     nrows = *(buf_ri_k[k]);
3932     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3933     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3934   }
3935 
3936   /* set values of ba */
3937   m = merge->rowmap.n;
3938   for (i=0; i<m; i++) {
3939     arow = owners[rank] + i;
3940     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3941     bnzi = bi[i+1] - bi[i];
3942     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
3943 
3944     /* add local non-zero vals of this proc's seqmat into ba */
3945     anzi = ai[arow+1] - ai[arow];
3946     aj   = a->j + ai[arow];
3947     aa   = a->a + ai[arow];
3948     nextaj = 0;
3949     for (j=0; nextaj<anzi; j++){
3950       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3951         ba_i[j] += aa[nextaj++];
3952       }
3953     }
3954 
3955     /* add received vals into ba */
3956     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3957       /* i-th row */
3958       if (i == *nextrow[k]) {
3959         anzi = *(nextai[k]+1) - *nextai[k];
3960         aj   = buf_rj[k] + *(nextai[k]);
3961         aa   = abuf_r[k] + *(nextai[k]);
3962         nextaj = 0;
3963         for (j=0; nextaj<anzi; j++){
3964           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3965             ba_i[j] += aa[nextaj++];
3966           }
3967         }
3968         nextrow[k]++; nextai[k]++;
3969       }
3970     }
3971     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3972   }
3973   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3974   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3975 
3976   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3977   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3978   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3979   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3980   PetscFunctionReturn(0);
3981 }
3982 
3983 #undef __FUNCT__
3984 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3985 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3986 {
3987   PetscErrorCode       ierr;
3988   Mat                  B_mpi;
3989   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3990   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3991   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3992   PetscInt             M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j;
3993   PetscInt             len,proc,*dnz,*onz;
3994   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3995   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3996   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3997   MPI_Status           *status;
3998   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
3999   PetscBT              lnkbt;
4000   Mat_Merge_SeqsToMPI  *merge;
4001   PetscContainer       container;
4002 
4003   PetscFunctionBegin;
4004   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4005 
4006   /* make sure it is a PETSc comm */
4007   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4008   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4009   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4010 
4011   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4012   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4013 
4014   /* determine row ownership */
4015   /*---------------------------------------------------------*/
4016   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
4017   merge->rowmap.n = m;
4018   merge->rowmap.N = M;
4019   merge->rowmap.bs = 1;
4020   ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr);
4021   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4022   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4023 
4024   m      = merge->rowmap.n;
4025   M      = merge->rowmap.N;
4026   owners = merge->rowmap.range;
4027 
4028   /* determine the number of messages to send, their lengths */
4029   /*---------------------------------------------------------*/
4030   len_s  = merge->len_s;
4031 
4032   len = 0;  /* length of buf_si[] */
4033   merge->nsend = 0;
4034   for (proc=0; proc<size; proc++){
4035     len_si[proc] = 0;
4036     if (proc == rank){
4037       len_s[proc] = 0;
4038     } else {
4039       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4040       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4041     }
4042     if (len_s[proc]) {
4043       merge->nsend++;
4044       nrows = 0;
4045       for (i=owners[proc]; i<owners[proc+1]; i++){
4046         if (ai[i+1] > ai[i]) nrows++;
4047       }
4048       len_si[proc] = 2*(nrows+1);
4049       len += len_si[proc];
4050     }
4051   }
4052 
4053   /* determine the number and length of messages to receive for ij-structure */
4054   /*-------------------------------------------------------------------------*/
4055   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4056   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4057 
4058   /* post the Irecv of j-structure */
4059   /*-------------------------------*/
4060   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4061   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4062 
4063   /* post the Isend of j-structure */
4064   /*--------------------------------*/
4065   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
4066   sj_waits = si_waits + merge->nsend;
4067 
4068   for (proc=0, k=0; proc<size; proc++){
4069     if (!len_s[proc]) continue;
4070     i = owners[proc];
4071     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4072     k++;
4073   }
4074 
4075   /* receives and sends of j-structure are complete */
4076   /*------------------------------------------------*/
4077   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4078   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4079 
4080   /* send and recv i-structure */
4081   /*---------------------------*/
4082   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4083   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4084 
4085   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4086   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4087   for (proc=0,k=0; proc<size; proc++){
4088     if (!len_s[proc]) continue;
4089     /* form outgoing message for i-structure:
4090          buf_si[0]:                 nrows to be sent
4091                [1:nrows]:           row index (global)
4092                [nrows+1:2*nrows+1]: i-structure index
4093     */
4094     /*-------------------------------------------*/
4095     nrows = len_si[proc]/2 - 1;
4096     buf_si_i    = buf_si + nrows+1;
4097     buf_si[0]   = nrows;
4098     buf_si_i[0] = 0;
4099     nrows = 0;
4100     for (i=owners[proc]; i<owners[proc+1]; i++){
4101       anzi = ai[i+1] - ai[i];
4102       if (anzi) {
4103         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4104         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4105         nrows++;
4106       }
4107     }
4108     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4109     k++;
4110     buf_si += len_si[proc];
4111   }
4112 
4113   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4114   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4115 
4116   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4117   for (i=0; i<merge->nrecv; i++){
4118     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4119   }
4120 
4121   ierr = PetscFree(len_si);CHKERRQ(ierr);
4122   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4123   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4124   ierr = PetscFree(si_waits);CHKERRQ(ierr);
4125   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4126   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4127   ierr = PetscFree(status);CHKERRQ(ierr);
4128 
4129   /* compute a local seq matrix in each processor */
4130   /*----------------------------------------------*/
4131   /* allocate bi array and free space for accumulating nonzero column info */
4132   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4133   bi[0] = 0;
4134 
4135   /* create and initialize a linked list */
4136   nlnk = N+1;
4137   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4138 
4139   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4140   len = 0;
4141   len  = ai[owners[rank+1]] - ai[owners[rank]];
4142   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4143   current_space = free_space;
4144 
4145   /* determine symbolic info for each local row */
4146   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4147   nextrow = buf_ri_k + merge->nrecv;
4148   nextai  = nextrow + merge->nrecv;
4149   for (k=0; k<merge->nrecv; k++){
4150     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4151     nrows = *buf_ri_k[k];
4152     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4153     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4154   }
4155 
4156   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4157   len = 0;
4158   for (i=0;i<m;i++) {
4159     bnzi   = 0;
4160     /* add local non-zero cols of this proc's seqmat into lnk */
4161     arow   = owners[rank] + i;
4162     anzi   = ai[arow+1] - ai[arow];
4163     aj     = a->j + ai[arow];
4164     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4165     bnzi += nlnk;
4166     /* add received col data into lnk */
4167     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4168       if (i == *nextrow[k]) { /* i-th row */
4169         anzi = *(nextai[k]+1) - *nextai[k];
4170         aj   = buf_rj[k] + *nextai[k];
4171         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4172         bnzi += nlnk;
4173         nextrow[k]++; nextai[k]++;
4174       }
4175     }
4176     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4177 
4178     /* if free space is not available, make more free space */
4179     if (current_space->local_remaining<bnzi) {
4180       ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
4181       nspacedouble++;
4182     }
4183     /* copy data into free space, then initialize lnk */
4184     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4185     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4186 
4187     current_space->array           += bnzi;
4188     current_space->local_used      += bnzi;
4189     current_space->local_remaining -= bnzi;
4190 
4191     bi[i+1] = bi[i] + bnzi;
4192   }
4193 
4194   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4195 
4196   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4197   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4198   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4199 
4200   /* create symbolic parallel matrix B_mpi */
4201   /*---------------------------------------*/
4202   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4203   if (n==PETSC_DECIDE) {
4204     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4205   } else {
4206     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4207   }
4208   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4209   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4210   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4211 
4212   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4213   B_mpi->assembled     = PETSC_FALSE;
4214   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4215   merge->bi            = bi;
4216   merge->bj            = bj;
4217   merge->buf_ri        = buf_ri;
4218   merge->buf_rj        = buf_rj;
4219   merge->coi           = PETSC_NULL;
4220   merge->coj           = PETSC_NULL;
4221   merge->owners_co     = PETSC_NULL;
4222 
4223   /* attach the supporting struct to B_mpi for reuse */
4224   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4225   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4226   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4227   *mpimat = B_mpi;
4228 
4229   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4230   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4231   PetscFunctionReturn(0);
4232 }
4233 
4234 #undef __FUNCT__
4235 #define __FUNCT__ "MatMerge_SeqsToMPI"
4236 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4237 {
4238   PetscErrorCode   ierr;
4239 
4240   PetscFunctionBegin;
4241   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4242   if (scall == MAT_INITIAL_MATRIX){
4243     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4244   }
4245   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4246   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4247   PetscFunctionReturn(0);
4248 }
4249 
4250 #undef __FUNCT__
4251 #define __FUNCT__ "MatGetLocalMat"
4252 /*@
4253      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4254 
4255     Not Collective
4256 
4257    Input Parameters:
4258 +    A - the matrix
4259 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4260 
4261    Output Parameter:
4262 .    A_loc - the local sequential matrix generated
4263 
4264     Level: developer
4265 
4266 @*/
4267 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4268 {
4269   PetscErrorCode  ierr;
4270   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4271   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4272   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4273   MatScalar       *aa=a->a,*ba=b->a,*cam;
4274   PetscScalar     *ca;
4275   PetscInt        am=A->rmap.n,i,j,k,cstart=A->cmap.rstart;
4276   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4277 
4278   PetscFunctionBegin;
4279   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4280   if (scall == MAT_INITIAL_MATRIX){
4281     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4282     ci[0] = 0;
4283     for (i=0; i<am; i++){
4284       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4285     }
4286     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4287     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4288     k = 0;
4289     for (i=0; i<am; i++) {
4290       ncols_o = bi[i+1] - bi[i];
4291       ncols_d = ai[i+1] - ai[i];
4292       /* off-diagonal portion of A */
4293       for (jo=0; jo<ncols_o; jo++) {
4294         col = cmap[*bj];
4295         if (col >= cstart) break;
4296         cj[k]   = col; bj++;
4297         ca[k++] = *ba++;
4298       }
4299       /* diagonal portion of A */
4300       for (j=0; j<ncols_d; j++) {
4301         cj[k]   = cstart + *aj++;
4302         ca[k++] = *aa++;
4303       }
4304       /* off-diagonal portion of A */
4305       for (j=jo; j<ncols_o; j++) {
4306         cj[k]   = cmap[*bj++];
4307         ca[k++] = *ba++;
4308       }
4309     }
4310     /* put together the new matrix */
4311     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4312     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4313     /* Since these are PETSc arrays, change flags to free them as necessary. */
4314     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4315     mat->free_a  = PETSC_TRUE;
4316     mat->free_ij = PETSC_TRUE;
4317     mat->nonew   = 0;
4318   } else if (scall == MAT_REUSE_MATRIX){
4319     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4320     ci = mat->i; cj = mat->j; cam = mat->a;
4321     for (i=0; i<am; i++) {
4322       /* off-diagonal portion of A */
4323       ncols_o = bi[i+1] - bi[i];
4324       for (jo=0; jo<ncols_o; jo++) {
4325         col = cmap[*bj];
4326         if (col >= cstart) break;
4327         *cam++ = *ba++; bj++;
4328       }
4329       /* diagonal portion of A */
4330       ncols_d = ai[i+1] - ai[i];
4331       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4332       /* off-diagonal portion of A */
4333       for (j=jo; j<ncols_o; j++) {
4334         *cam++ = *ba++; bj++;
4335       }
4336     }
4337   } else {
4338     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4339   }
4340 
4341   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4342   PetscFunctionReturn(0);
4343 }
4344 
4345 #undef __FUNCT__
4346 #define __FUNCT__ "MatGetLocalMatCondensed"
4347 /*@C
4348      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4349 
4350     Not Collective
4351 
4352    Input Parameters:
4353 +    A - the matrix
4354 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4355 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4356 
4357    Output Parameter:
4358 .    A_loc - the local sequential matrix generated
4359 
4360     Level: developer
4361 
4362 @*/
4363 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4364 {
4365   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4366   PetscErrorCode    ierr;
4367   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4368   IS                isrowa,iscola;
4369   Mat               *aloc;
4370 
4371   PetscFunctionBegin;
4372   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4373   if (!row){
4374     start = A->rmap.rstart; end = A->rmap.rend;
4375     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4376   } else {
4377     isrowa = *row;
4378   }
4379   if (!col){
4380     start = A->cmap.rstart;
4381     cmap  = a->garray;
4382     nzA   = a->A->cmap.n;
4383     nzB   = a->B->cmap.n;
4384     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4385     ncols = 0;
4386     for (i=0; i<nzB; i++) {
4387       if (cmap[i] < start) idx[ncols++] = cmap[i];
4388       else break;
4389     }
4390     imark = i;
4391     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4392     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4393     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4394     ierr = PetscFree(idx);CHKERRQ(ierr);
4395   } else {
4396     iscola = *col;
4397   }
4398   if (scall != MAT_INITIAL_MATRIX){
4399     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4400     aloc[0] = *A_loc;
4401   }
4402   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4403   *A_loc = aloc[0];
4404   ierr = PetscFree(aloc);CHKERRQ(ierr);
4405   if (!row){
4406     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4407   }
4408   if (!col){
4409     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4410   }
4411   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4412   PetscFunctionReturn(0);
4413 }
4414 
4415 #undef __FUNCT__
4416 #define __FUNCT__ "MatGetBrowsOfAcols"
4417 /*@C
4418     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4419 
4420     Collective on Mat
4421 
4422    Input Parameters:
4423 +    A,B - the matrices in mpiaij format
4424 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4425 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4426 
4427    Output Parameter:
4428 +    rowb, colb - index sets of rows and columns of B to extract
4429 .    brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows
4430 -    B_seq - the sequential matrix generated
4431 
4432     Level: developer
4433 
4434 @*/
4435 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4436 {
4437   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4438   PetscErrorCode    ierr;
4439   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4440   IS                isrowb,iscolb;
4441   Mat               *bseq;
4442 
4443   PetscFunctionBegin;
4444   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4445     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4446   }
4447   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4448 
4449   if (scall == MAT_INITIAL_MATRIX){
4450     start = A->cmap.rstart;
4451     cmap  = a->garray;
4452     nzA   = a->A->cmap.n;
4453     nzB   = a->B->cmap.n;
4454     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4455     ncols = 0;
4456     for (i=0; i<nzB; i++) {  /* row < local row index */
4457       if (cmap[i] < start) idx[ncols++] = cmap[i];
4458       else break;
4459     }
4460     imark = i;
4461     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4462     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4463     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4464     ierr = PetscFree(idx);CHKERRQ(ierr);
4465     *brstart = imark;
4466     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr);
4467   } else {
4468     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4469     isrowb = *rowb; iscolb = *colb;
4470     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4471     bseq[0] = *B_seq;
4472   }
4473   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4474   *B_seq = bseq[0];
4475   ierr = PetscFree(bseq);CHKERRQ(ierr);
4476   if (!rowb){
4477     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4478   } else {
4479     *rowb = isrowb;
4480   }
4481   if (!colb){
4482     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4483   } else {
4484     *colb = iscolb;
4485   }
4486   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4487   PetscFunctionReturn(0);
4488 }
4489 
4490 #undef __FUNCT__
4491 #define __FUNCT__ "MatGetBrowsOfAoCols"
4492 /*@C
4493     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4494     of the OFF-DIAGONAL portion of local A
4495 
4496     Collective on Mat
4497 
4498    Input Parameters:
4499 +    A,B - the matrices in mpiaij format
4500 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4501 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4502 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4503 
4504    Output Parameter:
4505 +    B_oth - the sequential matrix generated
4506 
4507     Level: developer
4508 
4509 @*/
4510 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth)
4511 {
4512   VecScatter_MPI_General *gen_to,*gen_from;
4513   PetscErrorCode         ierr;
4514   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4515   Mat_SeqAIJ             *b_oth;
4516   VecScatter             ctx=a->Mvctx;
4517   MPI_Comm               comm=((PetscObject)ctx)->comm;
4518   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4519   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj;
4520   PetscScalar            *rvalues,*svalues;
4521   MatScalar              *b_otha,*bufa,*bufA;
4522   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4523   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4524   MPI_Status             *sstatus,rstatus;
4525   PetscMPIInt            jj;
4526   PetscInt               *cols,sbs,rbs;
4527   PetscScalar            *vals;
4528 
4529   PetscFunctionBegin;
4530   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4531     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4532   }
4533   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4534   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4535 
4536   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4537   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4538   rvalues  = gen_from->values; /* holds the length of receiving row */
4539   svalues  = gen_to->values;   /* holds the length of sending row */
4540   nrecvs   = gen_from->n;
4541   nsends   = gen_to->n;
4542 
4543   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4544   srow     = gen_to->indices;   /* local row index to be sent */
4545   sstarts  = gen_to->starts;
4546   sprocs   = gen_to->procs;
4547   sstatus  = gen_to->sstatus;
4548   sbs      = gen_to->bs;
4549   rstarts  = gen_from->starts;
4550   rprocs   = gen_from->procs;
4551   rbs      = gen_from->bs;
4552 
4553   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4554   if (scall == MAT_INITIAL_MATRIX){
4555     /* i-array */
4556     /*---------*/
4557     /*  post receives */
4558     for (i=0; i<nrecvs; i++){
4559       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4560       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4561       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4562     }
4563 
4564     /* pack the outgoing message */
4565     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4566     rstartsj = sstartsj + nsends +1;
4567     sstartsj[0] = 0;  rstartsj[0] = 0;
4568     len = 0; /* total length of j or a array to be sent */
4569     k = 0;
4570     for (i=0; i<nsends; i++){
4571       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4572       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4573       for (j=0; j<nrows; j++) {
4574         row = srow[k] + B->rmap.range[rank]; /* global row idx */
4575         for (l=0; l<sbs; l++){
4576           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4577           rowlen[j*sbs+l] = ncols;
4578           len += ncols;
4579           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4580         }
4581         k++;
4582       }
4583       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4584       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4585     }
4586     /* recvs and sends of i-array are completed */
4587     i = nrecvs;
4588     while (i--) {
4589       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4590     }
4591     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4592 
4593     /* allocate buffers for sending j and a arrays */
4594     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4595     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4596 
4597     /* create i-array of B_oth */
4598     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4599     b_othi[0] = 0;
4600     len = 0; /* total length of j or a array to be received */
4601     k = 0;
4602     for (i=0; i<nrecvs; i++){
4603       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4604       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4605       for (j=0; j<nrows; j++) {
4606         b_othi[k+1] = b_othi[k] + rowlen[j];
4607         len += rowlen[j]; k++;
4608       }
4609       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4610     }
4611 
4612     /* allocate space for j and a arrrays of B_oth */
4613     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4614     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4615 
4616     /* j-array */
4617     /*---------*/
4618     /*  post receives of j-array */
4619     for (i=0; i<nrecvs; i++){
4620       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4621       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4622     }
4623 
4624     /* pack the outgoing message j-array */
4625     k = 0;
4626     for (i=0; i<nsends; i++){
4627       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4628       bufJ = bufj+sstartsj[i];
4629       for (j=0; j<nrows; j++) {
4630         row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4631         for (ll=0; ll<sbs; ll++){
4632           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4633           for (l=0; l<ncols; l++){
4634             *bufJ++ = cols[l];
4635           }
4636           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4637         }
4638       }
4639       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4640     }
4641 
4642     /* recvs and sends of j-array are completed */
4643     i = nrecvs;
4644     while (i--) {
4645       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4646     }
4647     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4648   } else if (scall == MAT_REUSE_MATRIX){
4649     sstartsj = *startsj;
4650     rstartsj = sstartsj + nsends +1;
4651     bufa     = *bufa_ptr;
4652     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4653     b_otha   = b_oth->a;
4654   } else {
4655     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4656   }
4657 
4658   /* a-array */
4659   /*---------*/
4660   /*  post receives of a-array */
4661   for (i=0; i<nrecvs; i++){
4662     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4663     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4664   }
4665 
4666   /* pack the outgoing message a-array */
4667   k = 0;
4668   for (i=0; i<nsends; i++){
4669     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4670     bufA = bufa+sstartsj[i];
4671     for (j=0; j<nrows; j++) {
4672       row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4673       for (ll=0; ll<sbs; ll++){
4674         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4675         for (l=0; l<ncols; l++){
4676           *bufA++ = vals[l];
4677         }
4678         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4679       }
4680     }
4681     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4682   }
4683   /* recvs and sends of a-array are completed */
4684   i = nrecvs;
4685   while (i--) {
4686     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4687   }
4688   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4689   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4690 
4691   if (scall == MAT_INITIAL_MATRIX){
4692     /* put together the new matrix */
4693     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4694 
4695     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4696     /* Since these are PETSc arrays, change flags to free them as necessary. */
4697     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4698     b_oth->free_a  = PETSC_TRUE;
4699     b_oth->free_ij = PETSC_TRUE;
4700     b_oth->nonew   = 0;
4701 
4702     ierr = PetscFree(bufj);CHKERRQ(ierr);
4703     if (!startsj || !bufa_ptr){
4704       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4705       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4706     } else {
4707       *startsj  = sstartsj;
4708       *bufa_ptr = bufa;
4709     }
4710   }
4711   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4712   PetscFunctionReturn(0);
4713 }
4714 
4715 #undef __FUNCT__
4716 #define __FUNCT__ "MatGetCommunicationStructs"
4717 /*@C
4718   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4719 
4720   Not Collective
4721 
4722   Input Parameters:
4723 . A - The matrix in mpiaij format
4724 
4725   Output Parameter:
4726 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4727 . colmap - A map from global column index to local index into lvec
4728 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4729 
4730   Level: developer
4731 
4732 @*/
4733 #if defined (PETSC_USE_CTABLE)
4734 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4735 #else
4736 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4737 #endif
4738 {
4739   Mat_MPIAIJ *a;
4740 
4741   PetscFunctionBegin;
4742   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4743   PetscValidPointer(lvec, 2)
4744   PetscValidPointer(colmap, 3)
4745   PetscValidPointer(multScatter, 4)
4746   a = (Mat_MPIAIJ *) A->data;
4747   if (lvec) *lvec = a->lvec;
4748   if (colmap) *colmap = a->colmap;
4749   if (multScatter) *multScatter = a->Mvctx;
4750   PetscFunctionReturn(0);
4751 }
4752 
4753 EXTERN_C_BEGIN
4754 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*);
4755 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*);
4756 EXTERN_C_END
4757 
4758 #include "src/mat/impls/dense/mpi/mpidense.h"
4759 
4760 #undef __FUNCT__
4761 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4762 /*
4763     Computes (B'*A')' since computing B*A directly is untenable
4764 
4765                n                       p                          p
4766         (              )       (              )         (                  )
4767       m (      A       )  *  n (       B      )   =   m (         C        )
4768         (              )       (              )         (                  )
4769 
4770 */
4771 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4772 {
4773   PetscErrorCode     ierr;
4774   Mat                At,Bt,Ct;
4775 
4776   PetscFunctionBegin;
4777   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4778   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4779   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4780   ierr = MatDestroy(At);CHKERRQ(ierr);
4781   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4782   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4783   ierr = MatDestroy(Ct);CHKERRQ(ierr);
4784   PetscFunctionReturn(0);
4785 }
4786 
4787 #undef __FUNCT__
4788 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4789 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4790 {
4791   PetscErrorCode ierr;
4792   PetscInt       m=A->rmap.n,n=B->cmap.n;
4793   Mat            Cmat;
4794 
4795   PetscFunctionBegin;
4796   if (A->cmap.n != B->rmap.n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap.n %d != B->rmap.n %d\n",A->cmap.n,B->rmap.n);
4797   ierr = MatCreate(A->hdr.comm,&Cmat);CHKERRQ(ierr);
4798   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4799   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4800   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
4801   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4802   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4803   *C   = Cmat;
4804   PetscFunctionReturn(0);
4805 }
4806 
4807 /* ----------------------------------------------------------------*/
4808 #undef __FUNCT__
4809 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4810 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4811 {
4812   PetscErrorCode ierr;
4813 
4814   PetscFunctionBegin;
4815   if (scall == MAT_INITIAL_MATRIX){
4816     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4817   }
4818   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4819   PetscFunctionReturn(0);
4820 }
4821 
4822 /*MC
4823    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4824 
4825    Options Database Keys:
4826 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4827 
4828   Level: beginner
4829 
4830 .seealso: MatCreateMPIAIJ()
4831 M*/
4832 
4833 EXTERN_C_BEGIN
4834 #undef __FUNCT__
4835 #define __FUNCT__ "MatCreate_MPIAIJ"
4836 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
4837 {
4838   Mat_MPIAIJ     *b;
4839   PetscErrorCode ierr;
4840   PetscMPIInt    size;
4841 
4842   PetscFunctionBegin;
4843   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
4844 
4845   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
4846   B->data         = (void*)b;
4847   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4848   B->factor       = 0;
4849   B->rmap.bs      = 1;
4850   B->assembled    = PETSC_FALSE;
4851   B->mapping      = 0;
4852 
4853   B->insertmode      = NOT_SET_VALUES;
4854   b->size            = size;
4855   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
4856 
4857   /* build cache for off array entries formed */
4858   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
4859   b->donotstash  = PETSC_FALSE;
4860   b->colmap      = 0;
4861   b->garray      = 0;
4862   b->roworiented = PETSC_TRUE;
4863 
4864   /* stuff used for matrix vector multiply */
4865   b->lvec      = PETSC_NULL;
4866   b->Mvctx     = PETSC_NULL;
4867 
4868   /* stuff for MatGetRow() */
4869   b->rowindices   = 0;
4870   b->rowvalues    = 0;
4871   b->getrowactive = PETSC_FALSE;
4872 
4873 
4874   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
4875                                      "MatStoreValues_MPIAIJ",
4876                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
4877   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
4878                                      "MatRetrieveValues_MPIAIJ",
4879                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
4880   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
4881 				     "MatGetDiagonalBlock_MPIAIJ",
4882                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
4883   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
4884 				     "MatIsTranspose_MPIAIJ",
4885 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
4886   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
4887 				     "MatMPIAIJSetPreallocation_MPIAIJ",
4888 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
4889   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
4890 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
4891 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
4892   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
4893 				     "MatDiagonalScaleLocal_MPIAIJ",
4894 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
4895   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
4896                                      "MatConvert_MPIAIJ_MPICSRPERM",
4897                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
4898   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
4899                                      "MatConvert_MPIAIJ_MPICRL",
4900                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
4901   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
4902                                      "MatMatMult_MPIDense_MPIAIJ",
4903                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
4904   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
4905                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
4906                                       MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
4907   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
4908                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
4909                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
4910   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
4911   PetscFunctionReturn(0);
4912 }
4913 EXTERN_C_END
4914 
4915 #undef __FUNCT__
4916 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
4917 /*@
4918      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
4919          and "off-diagonal" part of the matrix in CSR format.
4920 
4921    Collective on MPI_Comm
4922 
4923    Input Parameters:
4924 +  comm - MPI communicator
4925 .  m - number of local rows (Cannot be PETSC_DECIDE)
4926 .  n - This value should be the same as the local size used in creating the
4927        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4928        calculated if N is given) For square matrices n is almost always m.
4929 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4930 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4931 .   i - row indices for "diagonal" portion of matrix
4932 .   j - column indices
4933 .   a - matrix values
4934 .   oi - row indices for "off-diagonal" portion of matrix
4935 .   oj - column indices
4936 -   oa - matrix values
4937 
4938    Output Parameter:
4939 .   mat - the matrix
4940 
4941    Level: advanced
4942 
4943    Notes:
4944        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
4945 
4946        The i and j indices are 0 based
4947 
4948        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
4949 
4950 
4951 .keywords: matrix, aij, compressed row, sparse, parallel
4952 
4953 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4954           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
4955 @*/
4956 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
4957 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
4958 {
4959   PetscErrorCode ierr;
4960   Mat_MPIAIJ     *maij;
4961 
4962  PetscFunctionBegin;
4963   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4964   if (i[0]) {
4965     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4966   }
4967   if (oi[0]) {
4968     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
4969   }
4970   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4971   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4972   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4973   maij = (Mat_MPIAIJ*) (*mat)->data;
4974   maij->donotstash     = PETSC_TRUE;
4975   (*mat)->preallocated = PETSC_TRUE;
4976 
4977   (*mat)->rmap.bs = (*mat)->cmap.bs = 1;
4978   ierr = PetscMapSetUp(&(*mat)->rmap);CHKERRQ(ierr);
4979   ierr = PetscMapSetUp(&(*mat)->cmap);CHKERRQ(ierr);
4980 
4981   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
4982   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
4983 
4984   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4985   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4986   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4987   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4988 
4989   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4990   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4991   PetscFunctionReturn(0);
4992 }
4993 
4994 /*
4995     Special version for direct calls from Fortran
4996 */
4997 #if defined(PETSC_HAVE_FORTRAN_CAPS)
4998 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
4999 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5000 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5001 #endif
5002 
5003 /* Change these macros so can be used in void function */
5004 #undef CHKERRQ
5005 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5006 #undef SETERRQ2
5007 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5008 #undef SETERRQ
5009 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5010 
5011 EXTERN_C_BEGIN
5012 #undef __FUNCT__
5013 #define __FUNCT__ "matsetvaluesmpiaij_"
5014 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5015 {
5016   Mat             mat = *mmat;
5017   PetscInt        m = *mm, n = *mn;
5018   InsertMode      addv = *maddv;
5019   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5020   PetscScalar     value;
5021   PetscErrorCode  ierr;
5022 
5023   MatPreallocated(mat);
5024   if (mat->insertmode == NOT_SET_VALUES) {
5025     mat->insertmode = addv;
5026   }
5027 #if defined(PETSC_USE_DEBUG)
5028   else if (mat->insertmode != addv) {
5029     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5030   }
5031 #endif
5032   {
5033   PetscInt        i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
5034   PetscInt        cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
5035   PetscTruth      roworiented = aij->roworiented;
5036 
5037   /* Some Variables required in the macro */
5038   Mat             A = aij->A;
5039   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5040   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5041   MatScalar       *aa = a->a;
5042   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5043   Mat             B = aij->B;
5044   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5045   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
5046   MatScalar       *ba = b->a;
5047 
5048   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5049   PetscInt        nonew = a->nonew;
5050   MatScalar       *ap1,*ap2;
5051 
5052   PetscFunctionBegin;
5053   for (i=0; i<m; i++) {
5054     if (im[i] < 0) continue;
5055 #if defined(PETSC_USE_DEBUG)
5056     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
5057 #endif
5058     if (im[i] >= rstart && im[i] < rend) {
5059       row      = im[i] - rstart;
5060       lastcol1 = -1;
5061       rp1      = aj + ai[row];
5062       ap1      = aa + ai[row];
5063       rmax1    = aimax[row];
5064       nrow1    = ailen[row];
5065       low1     = 0;
5066       high1    = nrow1;
5067       lastcol2 = -1;
5068       rp2      = bj + bi[row];
5069       ap2      = ba + bi[row];
5070       rmax2    = bimax[row];
5071       nrow2    = bilen[row];
5072       low2     = 0;
5073       high2    = nrow2;
5074 
5075       for (j=0; j<n; j++) {
5076         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5077         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5078         if (in[j] >= cstart && in[j] < cend){
5079           col = in[j] - cstart;
5080           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5081         } else if (in[j] < 0) continue;
5082 #if defined(PETSC_USE_DEBUG)
5083         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
5084 #endif
5085         else {
5086           if (mat->was_assembled) {
5087             if (!aij->colmap) {
5088               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5089             }
5090 #if defined (PETSC_USE_CTABLE)
5091             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5092 	    col--;
5093 #else
5094             col = aij->colmap[in[j]] - 1;
5095 #endif
5096             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5097               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5098               col =  in[j];
5099               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5100               B = aij->B;
5101               b = (Mat_SeqAIJ*)B->data;
5102               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5103               rp2      = bj + bi[row];
5104               ap2      = ba + bi[row];
5105               rmax2    = bimax[row];
5106               nrow2    = bilen[row];
5107               low2     = 0;
5108               high2    = nrow2;
5109               bm       = aij->B->rmap.n;
5110               ba = b->a;
5111             }
5112           } else col = in[j];
5113           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5114         }
5115       }
5116     } else {
5117       if (!aij->donotstash) {
5118         if (roworiented) {
5119           if (ignorezeroentries && v[i*n] == 0.0) continue;
5120           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
5121         } else {
5122           if (ignorezeroentries && v[i] == 0.0) continue;
5123           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
5124         }
5125       }
5126     }
5127   }}
5128   PetscFunctionReturnVoid();
5129 }
5130 EXTERN_C_END
5131 
5132