xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 8cda6cd7047ed8bb2d68b8e371c012553cb590ce)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscTruth     aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap.n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap.n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap.n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap.n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 
207 #define CHUNKSIZE   15
208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
209 { \
210     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
211     lastcol1 = col;\
212     while (high1-low1 > 5) { \
213       t = (low1+high1)/2; \
214       if (rp1[t] > col) high1 = t; \
215       else             low1  = t; \
216     } \
217       for (_i=low1; _i<high1; _i++) { \
218         if (rp1[_i] > col) break; \
219         if (rp1[_i] == col) { \
220           if (addv == ADD_VALUES) ap1[_i] += value;   \
221           else                    ap1[_i] = value; \
222           goto a_noinsert; \
223         } \
224       }  \
225       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
226       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
227       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
228       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
229       N = nrow1++ - 1; a->nz++; high1++; \
230       /* shift up all the later entries in this row */ \
231       for (ii=N; ii>=_i; ii--) { \
232         rp1[ii+1] = rp1[ii]; \
233         ap1[ii+1] = ap1[ii]; \
234       } \
235       rp1[_i] = col;  \
236       ap1[_i] = value;  \
237       a_noinsert: ; \
238       ailen[row] = nrow1; \
239 }
240 
241 
242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
243 { \
244     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
245     lastcol2 = col;\
246     while (high2-low2 > 5) { \
247       t = (low2+high2)/2; \
248       if (rp2[t] > col) high2 = t; \
249       else             low2  = t; \
250     } \
251     for (_i=low2; _i<high2; _i++) {		\
252       if (rp2[_i] > col) break;			\
253       if (rp2[_i] == col) {			      \
254 	if (addv == ADD_VALUES) ap2[_i] += value;     \
255 	else                    ap2[_i] = value;      \
256 	goto b_noinsert;			      \
257       }						      \
258     }							      \
259     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
260     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
261     if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
262     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
263     N = nrow2++ - 1; b->nz++; high2++;					\
264     /* shift up all the later entries in this row */			\
265     for (ii=N; ii>=_i; ii--) {						\
266       rp2[ii+1] = rp2[ii];						\
267       ap2[ii+1] = ap2[ii];						\
268     }									\
269     rp2[_i] = col;							\
270     ap2[_i] = value;							\
271     b_noinsert: ;								\
272     bilen[row] = nrow2;							\
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
278 {
279   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
280   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
281   PetscErrorCode ierr;
282   PetscInt       l,*garray = mat->garray,diag;
283 
284   PetscFunctionBegin;
285   /* code only works for square matrices A */
286 
287   /* find size of row to the left of the diagonal part */
288   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
289   row  = row - diag;
290   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
291     if (garray[b->j[b->i[row]+l]] > diag) break;
292   }
293   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
294 
295   /* diagonal part */
296   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
297 
298   /* right of diagonal part */
299   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
300   PetscFunctionReturn(0);
301 }
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "MatSetValues_MPIAIJ"
305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
306 {
307   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
308   PetscScalar    value;
309   PetscErrorCode ierr;
310   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
311   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
312   PetscTruth     roworiented = aij->roworiented;
313 
314   /* Some Variables required in the macro */
315   Mat            A = aij->A;
316   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
317   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
318   MatScalar      *aa = a->a;
319   PetscTruth     ignorezeroentries = a->ignorezeroentries;
320   Mat            B = aij->B;
321   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
322   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
323   MatScalar      *ba = b->a;
324 
325   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
326   PetscInt       nonew = a->nonew;
327   MatScalar      *ap1,*ap2;
328 
329   PetscFunctionBegin;
330   for (i=0; i<m; i++) {
331     if (im[i] < 0) continue;
332 #if defined(PETSC_USE_DEBUG)
333     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
334 #endif
335     if (im[i] >= rstart && im[i] < rend) {
336       row      = im[i] - rstart;
337       lastcol1 = -1;
338       rp1      = aj + ai[row];
339       ap1      = aa + ai[row];
340       rmax1    = aimax[row];
341       nrow1    = ailen[row];
342       low1     = 0;
343       high1    = nrow1;
344       lastcol2 = -1;
345       rp2      = bj + bi[row];
346       ap2      = ba + bi[row];
347       rmax2    = bimax[row];
348       nrow2    = bilen[row];
349       low2     = 0;
350       high2    = nrow2;
351 
352       for (j=0; j<n; j++) {
353         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
354         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
355         if (in[j] >= cstart && in[j] < cend){
356           col = in[j] - cstart;
357           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
358         } else if (in[j] < 0) continue;
359 #if defined(PETSC_USE_DEBUG)
360         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
361 #endif
362         else {
363           if (mat->was_assembled) {
364             if (!aij->colmap) {
365               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
366             }
367 #if defined (PETSC_USE_CTABLE)
368             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
369 	    col--;
370 #else
371             col = aij->colmap[in[j]] - 1;
372 #endif
373             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
374               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
375               col =  in[j];
376               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
377               B = aij->B;
378               b = (Mat_SeqAIJ*)B->data;
379               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
380               rp2      = bj + bi[row];
381               ap2      = ba + bi[row];
382               rmax2    = bimax[row];
383               nrow2    = bilen[row];
384               low2     = 0;
385               high2    = nrow2;
386               bm       = aij->B->rmap.n;
387               ba = b->a;
388             }
389           } else col = in[j];
390           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
391         }
392       }
393     } else {
394       if (!aij->donotstash) {
395         if (roworiented) {
396           if (ignorezeroentries && v[i*n] == 0.0) continue;
397           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
398         } else {
399           if (ignorezeroentries && v[i] == 0.0) continue;
400           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
401         }
402       }
403     }
404   }
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNCT__
409 #define __FUNCT__ "MatGetValues_MPIAIJ"
410 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
411 {
412   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
413   PetscErrorCode ierr;
414   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
415   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
416 
417   PetscFunctionBegin;
418   for (i=0; i<m; i++) {
419     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
420     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
421     if (idxm[i] >= rstart && idxm[i] < rend) {
422       row = idxm[i] - rstart;
423       for (j=0; j<n; j++) {
424         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
425         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
426         if (idxn[j] >= cstart && idxn[j] < cend){
427           col = idxn[j] - cstart;
428           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
429         } else {
430           if (!aij->colmap) {
431             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
432           }
433 #if defined (PETSC_USE_CTABLE)
434           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
435           col --;
436 #else
437           col = aij->colmap[idxn[j]] - 1;
438 #endif
439           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
440           else {
441             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
442           }
443         }
444       }
445     } else {
446       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
447     }
448   }
449   PetscFunctionReturn(0);
450 }
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscErrorCode ierr;
458   PetscInt       nstash,reallocs;
459   InsertMode     addv;
460 
461   PetscFunctionBegin;
462   if (aij->donotstash) {
463     PetscFunctionReturn(0);
464   }
465 
466   /* make sure all processors are either in INSERTMODE or ADDMODE */
467   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
468   if (addv == (ADD_VALUES|INSERT_VALUES)) {
469     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
470   }
471   mat->insertmode = addv; /* in case this processor had no cache */
472 
473   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr);
474   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
475   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
482 {
483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
484   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
485   PetscErrorCode ierr;
486   PetscMPIInt    n;
487   PetscInt       i,j,rstart,ncols,flg;
488   PetscInt       *row,*col;
489   PetscTruth     other_disassembled;
490   PetscScalar    *val;
491   InsertMode     addv = mat->insertmode;
492 
493   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
494   PetscFunctionBegin;
495   if (!aij->donotstash) {
496     while (1) {
497       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
498       if (!flg) break;
499 
500       for (i=0; i<n;) {
501         /* Now identify the consecutive vals belonging to the same row */
502         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
503         if (j < n) ncols = j-i;
504         else       ncols = n-i;
505         /* Now assemble all these values with a single function call */
506         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
507         i = j;
508       }
509     }
510     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
511   }
512   a->compressedrow.use     = PETSC_FALSE;
513   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
514   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
515 
516   /* determine if any processor has disassembled, if so we must
517      also disassemble ourselfs, in order that we may reassemble. */
518   /*
519      if nonzero structure of submatrix B cannot change then we know that
520      no processor disassembled thus we can skip this stuff
521   */
522   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
523     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
524     if (mat->was_assembled && !other_disassembled) {
525       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
526     }
527   }
528   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
529     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
530   }
531   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
532   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
533   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
534   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
535 
536   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
537   aij->rowvalues = 0;
538 
539   /* used by MatAXPY() */
540   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
541   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
542 
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
549 {
550   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
551   PetscErrorCode ierr;
552 
553   PetscFunctionBegin;
554   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
555   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNCT__
560 #define __FUNCT__ "MatZeroRows_MPIAIJ"
561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
562 {
563   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
564   PetscErrorCode ierr;
565   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
566   PetscInt       i,*owners = A->rmap.range;
567   PetscInt       *nprocs,j,idx,nsends,row;
568   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
569   PetscInt       *rvalues,count,base,slen,*source;
570   PetscInt       *lens,*lrows,*values,rstart=A->rmap.rstart;
571   MPI_Comm       comm = ((PetscObject)A)->comm;
572   MPI_Request    *send_waits,*recv_waits;
573   MPI_Status     recv_status,*send_status;
574 #if defined(PETSC_DEBUG)
575   PetscTruth     found = PETSC_FALSE;
576 #endif
577 
578   PetscFunctionBegin;
579   /*  first count number of contributors to each processor */
580   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
581   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
582   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
583   j = 0;
584   for (i=0; i<N; i++) {
585     if (lastidx > (idx = rows[i])) j = 0;
586     lastidx = idx;
587     for (; j<size; j++) {
588       if (idx >= owners[j] && idx < owners[j+1]) {
589         nprocs[2*j]++;
590         nprocs[2*j+1] = 1;
591         owner[i] = j;
592 #if defined(PETSC_DEBUG)
593         found = PETSC_TRUE;
594 #endif
595         break;
596       }
597     }
598 #if defined(PETSC_DEBUG)
599     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
600     found = PETSC_FALSE;
601 #endif
602   }
603   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
604 
605   /* inform other processors of number of messages and max length*/
606   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
607 
608   /* post receives:   */
609   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
610   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
611   for (i=0; i<nrecvs; i++) {
612     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
613   }
614 
615   /* do sends:
616       1) starts[i] gives the starting index in svalues for stuff going to
617          the ith processor
618   */
619   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
620   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
621   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
622   starts[0] = 0;
623   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
624   for (i=0; i<N; i++) {
625     svalues[starts[owner[i]]++] = rows[i];
626   }
627 
628   starts[0] = 0;
629   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
630   count = 0;
631   for (i=0; i<size; i++) {
632     if (nprocs[2*i+1]) {
633       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
634     }
635   }
636   ierr = PetscFree(starts);CHKERRQ(ierr);
637 
638   base = owners[rank];
639 
640   /*  wait on receives */
641   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
642   source = lens + nrecvs;
643   count  = nrecvs; slen = 0;
644   while (count) {
645     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
646     /* unpack receives into our local space */
647     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
648     source[imdex]  = recv_status.MPI_SOURCE;
649     lens[imdex]    = n;
650     slen          += n;
651     count--;
652   }
653   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
654 
655   /* move the data into the send scatter */
656   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
657   count = 0;
658   for (i=0; i<nrecvs; i++) {
659     values = rvalues + i*nmax;
660     for (j=0; j<lens[i]; j++) {
661       lrows[count++] = values[j] - base;
662     }
663   }
664   ierr = PetscFree(rvalues);CHKERRQ(ierr);
665   ierr = PetscFree(lens);CHKERRQ(ierr);
666   ierr = PetscFree(owner);CHKERRQ(ierr);
667   ierr = PetscFree(nprocs);CHKERRQ(ierr);
668 
669   /* actually zap the local rows */
670   /*
671         Zero the required rows. If the "diagonal block" of the matrix
672      is square and the user wishes to set the diagonal we use separate
673      code so that MatSetValues() is not called for each diagonal allocating
674      new memory, thus calling lots of mallocs and slowing things down.
675 
676        Contributed by: Matthew Knepley
677   */
678   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
679   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
680   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
681     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
682   } else if (diag != 0.0) {
683     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
684     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
685       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
687     }
688     for (i = 0; i < slen; i++) {
689       row  = lrows[i] + rstart;
690       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
691     }
692     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
693     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
694   } else {
695     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
696   }
697   ierr = PetscFree(lrows);CHKERRQ(ierr);
698 
699   /* wait on sends */
700   if (nsends) {
701     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
702     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
703     ierr = PetscFree(send_status);CHKERRQ(ierr);
704   }
705   ierr = PetscFree(send_waits);CHKERRQ(ierr);
706   ierr = PetscFree(svalues);CHKERRQ(ierr);
707 
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNCT__
712 #define __FUNCT__ "MatMult_MPIAIJ"
713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
714 {
715   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
716   PetscErrorCode ierr;
717   PetscInt       nt;
718 
719   PetscFunctionBegin;
720   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
721   if (nt != A->cmap.n) {
722     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt);
723   }
724   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
725   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
726   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
727   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
728   PetscFunctionReturn(0);
729 }
730 
731 #undef __FUNCT__
732 #define __FUNCT__ "MatMultAdd_MPIAIJ"
733 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
734 {
735   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
736   PetscErrorCode ierr;
737 
738   PetscFunctionBegin;
739   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
740   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
741   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
742   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
743   PetscFunctionReturn(0);
744 }
745 
746 #undef __FUNCT__
747 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
748 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
749 {
750   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
751   PetscErrorCode ierr;
752   PetscTruth     merged;
753 
754   PetscFunctionBegin;
755   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
756   /* do nondiagonal part */
757   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
758   if (!merged) {
759     /* send it on its way */
760     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
761     /* do local part */
762     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
763     /* receive remote parts: note this assumes the values are not actually */
764     /* added in yy until the next line, */
765     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
766   } else {
767     /* do local part */
768     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
769     /* send it on its way */
770     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
771     /* values actually were received in the Begin() but we need to call this nop */
772     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
773   }
774   PetscFunctionReturn(0);
775 }
776 
777 EXTERN_C_BEGIN
778 #undef __FUNCT__
779 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
780 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
781 {
782   MPI_Comm       comm;
783   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
784   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
785   IS             Me,Notme;
786   PetscErrorCode ierr;
787   PetscInt       M,N,first,last,*notme,i;
788   PetscMPIInt    size;
789 
790   PetscFunctionBegin;
791 
792   /* Easy test: symmetric diagonal block */
793   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
794   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
795   if (!*f) PetscFunctionReturn(0);
796   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
797   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
798   if (size == 1) PetscFunctionReturn(0);
799 
800   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
801   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
802   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
803   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
804   for (i=0; i<first; i++) notme[i] = i;
805   for (i=last; i<M; i++) notme[i-last+first] = i;
806   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
807   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
808   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
809   Aoff = Aoffs[0];
810   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
811   Boff = Boffs[0];
812   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
813   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
814   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
815   ierr = ISDestroy(Me);CHKERRQ(ierr);
816   ierr = ISDestroy(Notme);CHKERRQ(ierr);
817 
818   PetscFunctionReturn(0);
819 }
820 EXTERN_C_END
821 
822 #undef __FUNCT__
823 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
824 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
825 {
826   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
827   PetscErrorCode ierr;
828 
829   PetscFunctionBegin;
830   /* do nondiagonal part */
831   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
832   /* send it on its way */
833   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
834   /* do local part */
835   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
836   /* receive remote parts */
837   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
838   PetscFunctionReturn(0);
839 }
840 
841 /*
842   This only works correctly for square matrices where the subblock A->A is the
843    diagonal block
844 */
845 #undef __FUNCT__
846 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
847 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
848 {
849   PetscErrorCode ierr;
850   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
851 
852   PetscFunctionBegin;
853   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
854   if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) {
855     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
856   }
857   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
858   PetscFunctionReturn(0);
859 }
860 
861 #undef __FUNCT__
862 #define __FUNCT__ "MatScale_MPIAIJ"
863 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
864 {
865   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
866   PetscErrorCode ierr;
867 
868   PetscFunctionBegin;
869   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
870   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
871   PetscFunctionReturn(0);
872 }
873 
874 #undef __FUNCT__
875 #define __FUNCT__ "MatDestroy_MPIAIJ"
876 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
877 {
878   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
879   PetscErrorCode ierr;
880 
881   PetscFunctionBegin;
882 #if defined(PETSC_USE_LOG)
883   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
884 #endif
885   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
886   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
887   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
888 #if defined (PETSC_USE_CTABLE)
889   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
890 #else
891   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
892 #endif
893   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
894   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
895   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
896   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
897   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
898   ierr = PetscFree(aij);CHKERRQ(ierr);
899 
900   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
901   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
902   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
903   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
904   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
905   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
906   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
907   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
908   PetscFunctionReturn(0);
909 }
910 
911 #undef __FUNCT__
912 #define __FUNCT__ "MatView_MPIAIJ_Binary"
913 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
914 {
915   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
916   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
917   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
918   PetscErrorCode    ierr;
919   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
920   int               fd;
921   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
922   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz;
923   PetscScalar       *column_values;
924 
925   PetscFunctionBegin;
926   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
927   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
928   nz   = A->nz + B->nz;
929   if (!rank) {
930     header[0] = MAT_FILE_COOKIE;
931     header[1] = mat->rmap.N;
932     header[2] = mat->cmap.N;
933     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
934     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
935     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
936     /* get largest number of rows any processor has */
937     rlen = mat->rmap.n;
938     range = mat->rmap.range;
939     for (i=1; i<size; i++) {
940       rlen = PetscMax(rlen,range[i+1] - range[i]);
941     }
942   } else {
943     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
944     rlen = mat->rmap.n;
945   }
946 
947   /* load up the local row counts */
948   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
949   for (i=0; i<mat->rmap.n; i++) {
950     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
951   }
952 
953   /* store the row lengths to the file */
954   if (!rank) {
955     MPI_Status status;
956     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
957     for (i=1; i<size; i++) {
958       rlen = range[i+1] - range[i];
959       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
960       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
961     }
962   } else {
963     ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
964   }
965   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
966 
967   /* load up the local column indices */
968   nzmax = nz; /* )th processor needs space a largest processor needs */
969   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
970   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
971   cnt  = 0;
972   for (i=0; i<mat->rmap.n; i++) {
973     for (j=B->i[i]; j<B->i[i+1]; j++) {
974       if ( (col = garray[B->j[j]]) > cstart) break;
975       column_indices[cnt++] = col;
976     }
977     for (k=A->i[i]; k<A->i[i+1]; k++) {
978       column_indices[cnt++] = A->j[k] + cstart;
979     }
980     for (; j<B->i[i+1]; j++) {
981       column_indices[cnt++] = garray[B->j[j]];
982     }
983   }
984   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
985 
986   /* store the column indices to the file */
987   if (!rank) {
988     MPI_Status status;
989     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
990     for (i=1; i<size; i++) {
991       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
992       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
993       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
994       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
995     }
996   } else {
997     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
998     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
999   }
1000   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1001 
1002   /* load up the local column values */
1003   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1004   cnt  = 0;
1005   for (i=0; i<mat->rmap.n; i++) {
1006     for (j=B->i[i]; j<B->i[i+1]; j++) {
1007       if ( garray[B->j[j]] > cstart) break;
1008       column_values[cnt++] = B->a[j];
1009     }
1010     for (k=A->i[i]; k<A->i[i+1]; k++) {
1011       column_values[cnt++] = A->a[k];
1012     }
1013     for (; j<B->i[i+1]; j++) {
1014       column_values[cnt++] = B->a[j];
1015     }
1016   }
1017   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1018 
1019   /* store the column values to the file */
1020   if (!rank) {
1021     MPI_Status status;
1022     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1023     for (i=1; i<size; i++) {
1024       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1025       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1026       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1027       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1028     }
1029   } else {
1030     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1031     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1032   }
1033   ierr = PetscFree(column_values);CHKERRQ(ierr);
1034   PetscFunctionReturn(0);
1035 }
1036 
1037 #undef __FUNCT__
1038 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1039 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1040 {
1041   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1042   PetscErrorCode    ierr;
1043   PetscMPIInt       rank = aij->rank,size = aij->size;
1044   PetscTruth        isdraw,iascii,isbinary;
1045   PetscViewer       sviewer;
1046   PetscViewerFormat format;
1047 
1048   PetscFunctionBegin;
1049   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1050   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1051   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1052   if (iascii) {
1053     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1054     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1055       MatInfo    info;
1056       PetscTruth inodes;
1057 
1058       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1059       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1060       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1061       if (!inodes) {
1062         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1063 					      rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1064       } else {
1065         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1066 		    rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1067       }
1068       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1069       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1070       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1071       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1072       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1073       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1074       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1075       PetscFunctionReturn(0);
1076     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1077       PetscInt   inodecount,inodelimit,*inodes;
1078       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1079       if (inodes) {
1080         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1081       } else {
1082         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1083       }
1084       PetscFunctionReturn(0);
1085     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1086       PetscFunctionReturn(0);
1087     }
1088   } else if (isbinary) {
1089     if (size == 1) {
1090       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1091       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1092     } else {
1093       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1094     }
1095     PetscFunctionReturn(0);
1096   } else if (isdraw) {
1097     PetscDraw  draw;
1098     PetscTruth isnull;
1099     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1100     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1101   }
1102 
1103   if (size == 1) {
1104     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1105     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1106   } else {
1107     /* assemble the entire matrix onto first processor. */
1108     Mat         A;
1109     Mat_SeqAIJ  *Aloc;
1110     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
1111     MatScalar   *a;
1112 
1113     if (mat->rmap.N > 1024) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 512 rows, use binary format instead");
1114 
1115     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1116     if (!rank) {
1117       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1118     } else {
1119       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1120     }
1121     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1122     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1123     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1124     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1125 
1126     /* copy over the A part */
1127     Aloc = (Mat_SeqAIJ*)aij->A->data;
1128     m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1129     row = mat->rmap.rstart;
1130     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;}
1131     for (i=0; i<m; i++) {
1132       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1133       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1134     }
1135     aj = Aloc->j;
1136     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;}
1137 
1138     /* copy over the B part */
1139     Aloc = (Mat_SeqAIJ*)aij->B->data;
1140     m    = aij->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1141     row  = mat->rmap.rstart;
1142     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1143     ct   = cols;
1144     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1145     for (i=0; i<m; i++) {
1146       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1147       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1148     }
1149     ierr = PetscFree(ct);CHKERRQ(ierr);
1150     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1151     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1152     /*
1153        Everyone has to call to draw the matrix since the graphics waits are
1154        synchronized across all processors that share the PetscDraw object
1155     */
1156     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1157     if (!rank) {
1158       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1159       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1160     }
1161     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1162     ierr = MatDestroy(A);CHKERRQ(ierr);
1163   }
1164   PetscFunctionReturn(0);
1165 }
1166 
1167 #undef __FUNCT__
1168 #define __FUNCT__ "MatView_MPIAIJ"
1169 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1170 {
1171   PetscErrorCode ierr;
1172   PetscTruth     iascii,isdraw,issocket,isbinary;
1173 
1174   PetscFunctionBegin;
1175   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1176   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1177   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1178   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1179   if (iascii || isdraw || isbinary || issocket) {
1180     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1181   } else {
1182     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1183   }
1184   PetscFunctionReturn(0);
1185 }
1186 
1187 #undef __FUNCT__
1188 #define __FUNCT__ "MatRelax_MPIAIJ"
1189 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1190 {
1191   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1192   PetscErrorCode ierr;
1193   Vec            bb1;
1194 
1195   PetscFunctionBegin;
1196   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1197 
1198   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1199     if (flag & SOR_ZERO_INITIAL_GUESS) {
1200       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1201       its--;
1202     }
1203 
1204     while (its--) {
1205       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1206       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1207 
1208       /* update rhs: bb1 = bb - B*x */
1209       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1210       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1211 
1212       /* local sweep */
1213       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
1214     }
1215   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1216     if (flag & SOR_ZERO_INITIAL_GUESS) {
1217       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1218       its--;
1219     }
1220     while (its--) {
1221       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1222       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1223 
1224       /* update rhs: bb1 = bb - B*x */
1225       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1226       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1227 
1228       /* local sweep */
1229       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1230     }
1231   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1232     if (flag & SOR_ZERO_INITIAL_GUESS) {
1233       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1234       its--;
1235     }
1236     while (its--) {
1237       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1238       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1239 
1240       /* update rhs: bb1 = bb - B*x */
1241       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1242       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1243 
1244       /* local sweep */
1245       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1246     }
1247   } else {
1248     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1249   }
1250 
1251   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 #undef __FUNCT__
1256 #define __FUNCT__ "MatPermute_MPIAIJ"
1257 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1258 {
1259   MPI_Comm       comm,pcomm;
1260   PetscInt       first,local_size,nrows,*rows;
1261   int            ntids;
1262   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1263   PetscErrorCode ierr;
1264 
1265   PetscFunctionBegin;
1266   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1267   /* make a collective version of 'rowp' */
1268   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1269   if (pcomm==comm) {
1270     crowp = rowp;
1271   } else {
1272     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1273     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1274     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1275     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1276   }
1277   /* collect the global row permutation and invert it */
1278   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1279   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1280   if (pcomm!=comm) {
1281     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1282   }
1283   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1284   /* get the local target indices */
1285   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1286   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1287   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1288   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1289   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1290   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1291   /* the column permutation is so much easier;
1292      make a local version of 'colp' and invert it */
1293   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1294   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1295   if (ntids==1) {
1296     lcolp = colp;
1297   } else {
1298     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1299     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1300     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1301   }
1302   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1303   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1304   if (ntids>1) {
1305     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1306     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1307   }
1308   /* now we just get the submatrix */
1309   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1310   /* clean up */
1311   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1312   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1313   PetscFunctionReturn(0);
1314 }
1315 
1316 #undef __FUNCT__
1317 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1318 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1319 {
1320   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1321   Mat            A = mat->A,B = mat->B;
1322   PetscErrorCode ierr;
1323   PetscReal      isend[5],irecv[5];
1324 
1325   PetscFunctionBegin;
1326   info->block_size     = 1.0;
1327   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1328   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1329   isend[3] = info->memory;  isend[4] = info->mallocs;
1330   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1331   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1332   isend[3] += info->memory;  isend[4] += info->mallocs;
1333   if (flag == MAT_LOCAL) {
1334     info->nz_used      = isend[0];
1335     info->nz_allocated = isend[1];
1336     info->nz_unneeded  = isend[2];
1337     info->memory       = isend[3];
1338     info->mallocs      = isend[4];
1339   } else if (flag == MAT_GLOBAL_MAX) {
1340     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1341     info->nz_used      = irecv[0];
1342     info->nz_allocated = irecv[1];
1343     info->nz_unneeded  = irecv[2];
1344     info->memory       = irecv[3];
1345     info->mallocs      = irecv[4];
1346   } else if (flag == MAT_GLOBAL_SUM) {
1347     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1348     info->nz_used      = irecv[0];
1349     info->nz_allocated = irecv[1];
1350     info->nz_unneeded  = irecv[2];
1351     info->memory       = irecv[3];
1352     info->mallocs      = irecv[4];
1353   }
1354   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1355   info->fill_ratio_needed = 0;
1356   info->factor_mallocs    = 0;
1357   info->rows_global       = (double)matin->rmap.N;
1358   info->columns_global    = (double)matin->cmap.N;
1359   info->rows_local        = (double)matin->rmap.n;
1360   info->columns_local     = (double)matin->cmap.N;
1361 
1362   PetscFunctionReturn(0);
1363 }
1364 
1365 #undef __FUNCT__
1366 #define __FUNCT__ "MatSetOption_MPIAIJ"
1367 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1368 {
1369   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1370   PetscErrorCode ierr;
1371 
1372   PetscFunctionBegin;
1373   switch (op) {
1374   case MAT_NEW_NONZERO_LOCATIONS:
1375   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1376   case MAT_KEEP_ZEROED_ROWS:
1377   case MAT_NEW_NONZERO_LOCATION_ERR:
1378   case MAT_USE_INODES:
1379   case MAT_IGNORE_ZERO_ENTRIES:
1380     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1381     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1382     break;
1383   case MAT_ROW_ORIENTED:
1384     a->roworiented = flg;
1385     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1386     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1387     break;
1388   case MAT_NEW_DIAGONALS:
1389     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1390     break;
1391   case MAT_IGNORE_OFF_PROC_ENTRIES:
1392     a->donotstash = PETSC_TRUE;
1393     break;
1394   case MAT_SYMMETRIC:
1395     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1396     break;
1397   case MAT_STRUCTURALLY_SYMMETRIC:
1398   case MAT_HERMITIAN:
1399   case MAT_SYMMETRY_ETERNAL:
1400     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1401     break;
1402   default:
1403     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1404   }
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 #undef __FUNCT__
1409 #define __FUNCT__ "MatGetRow_MPIAIJ"
1410 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1411 {
1412   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1413   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1414   PetscErrorCode ierr;
1415   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart;
1416   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend;
1417   PetscInt       *cmap,*idx_p;
1418 
1419   PetscFunctionBegin;
1420   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1421   mat->getrowactive = PETSC_TRUE;
1422 
1423   if (!mat->rowvalues && (idx || v)) {
1424     /*
1425         allocate enough space to hold information from the longest row.
1426     */
1427     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1428     PetscInt     max = 1,tmp;
1429     for (i=0; i<matin->rmap.n; i++) {
1430       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1431       if (max < tmp) { max = tmp; }
1432     }
1433     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1434     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1435   }
1436 
1437   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1438   lrow = row - rstart;
1439 
1440   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1441   if (!v)   {pvA = 0; pvB = 0;}
1442   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1443   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1444   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1445   nztot = nzA + nzB;
1446 
1447   cmap  = mat->garray;
1448   if (v  || idx) {
1449     if (nztot) {
1450       /* Sort by increasing column numbers, assuming A and B already sorted */
1451       PetscInt imark = -1;
1452       if (v) {
1453         *v = v_p = mat->rowvalues;
1454         for (i=0; i<nzB; i++) {
1455           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1456           else break;
1457         }
1458         imark = i;
1459         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1460         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1461       }
1462       if (idx) {
1463         *idx = idx_p = mat->rowindices;
1464         if (imark > -1) {
1465           for (i=0; i<imark; i++) {
1466             idx_p[i] = cmap[cworkB[i]];
1467           }
1468         } else {
1469           for (i=0; i<nzB; i++) {
1470             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1471             else break;
1472           }
1473           imark = i;
1474         }
1475         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1476         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1477       }
1478     } else {
1479       if (idx) *idx = 0;
1480       if (v)   *v   = 0;
1481     }
1482   }
1483   *nz = nztot;
1484   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1485   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1486   PetscFunctionReturn(0);
1487 }
1488 
1489 #undef __FUNCT__
1490 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1491 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1492 {
1493   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1494 
1495   PetscFunctionBegin;
1496   if (!aij->getrowactive) {
1497     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1498   }
1499   aij->getrowactive = PETSC_FALSE;
1500   PetscFunctionReturn(0);
1501 }
1502 
1503 #undef __FUNCT__
1504 #define __FUNCT__ "MatNorm_MPIAIJ"
1505 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1506 {
1507   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1508   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1509   PetscErrorCode ierr;
1510   PetscInt       i,j,cstart = mat->cmap.rstart;
1511   PetscReal      sum = 0.0;
1512   MatScalar      *v;
1513 
1514   PetscFunctionBegin;
1515   if (aij->size == 1) {
1516     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1517   } else {
1518     if (type == NORM_FROBENIUS) {
1519       v = amat->a;
1520       for (i=0; i<amat->nz; i++) {
1521 #if defined(PETSC_USE_COMPLEX)
1522         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1523 #else
1524         sum += (*v)*(*v); v++;
1525 #endif
1526       }
1527       v = bmat->a;
1528       for (i=0; i<bmat->nz; i++) {
1529 #if defined(PETSC_USE_COMPLEX)
1530         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1531 #else
1532         sum += (*v)*(*v); v++;
1533 #endif
1534       }
1535       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1536       *norm = sqrt(*norm);
1537     } else if (type == NORM_1) { /* max column norm */
1538       PetscReal *tmp,*tmp2;
1539       PetscInt  *jj,*garray = aij->garray;
1540       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1541       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1542       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
1543       *norm = 0.0;
1544       v = amat->a; jj = amat->j;
1545       for (j=0; j<amat->nz; j++) {
1546         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1547       }
1548       v = bmat->a; jj = bmat->j;
1549       for (j=0; j<bmat->nz; j++) {
1550         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1551       }
1552       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1553       for (j=0; j<mat->cmap.N; j++) {
1554         if (tmp2[j] > *norm) *norm = tmp2[j];
1555       }
1556       ierr = PetscFree(tmp);CHKERRQ(ierr);
1557       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1558     } else if (type == NORM_INFINITY) { /* max row norm */
1559       PetscReal ntemp = 0.0;
1560       for (j=0; j<aij->A->rmap.n; j++) {
1561         v = amat->a + amat->i[j];
1562         sum = 0.0;
1563         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1564           sum += PetscAbsScalar(*v); v++;
1565         }
1566         v = bmat->a + bmat->i[j];
1567         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1568           sum += PetscAbsScalar(*v); v++;
1569         }
1570         if (sum > ntemp) ntemp = sum;
1571       }
1572       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1573     } else {
1574       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1575     }
1576   }
1577   PetscFunctionReturn(0);
1578 }
1579 
1580 #undef __FUNCT__
1581 #define __FUNCT__ "MatTranspose_MPIAIJ"
1582 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1583 {
1584   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1585   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1586   PetscErrorCode ierr;
1587   PetscInt       M = A->rmap.N,N = A->cmap.N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1588   PetscInt       cstart=A->cmap.rstart,ncol;
1589   Mat            B;
1590   MatScalar      *array;
1591 
1592   PetscFunctionBegin;
1593   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1594 
1595   ma = A->rmap.n; na = A->cmap.n; mb = a->B->rmap.n;
1596   ai = Aloc->i; aj = Aloc->j;
1597   bi = Bloc->i; bj = Bloc->j;
1598   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1599     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1600     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1601     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1602     for (i=0; i<ai[ma]; i++){
1603       d_nnz[aj[i]] ++;
1604       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1605     }
1606 
1607     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1608     ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1609     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1610     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1611     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1612   } else {
1613     B = *matout;
1614   }
1615 
1616   /* copy over the A part */
1617   array = Aloc->a;
1618   row = A->rmap.rstart;
1619   for (i=0; i<ma; i++) {
1620     ncol = ai[i+1]-ai[i];
1621     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1622     row++; array += ncol; aj += ncol;
1623   }
1624   aj = Aloc->j;
1625   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1626 
1627   /* copy over the B part */
1628   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1629   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1630   array = Bloc->a;
1631   row = A->rmap.rstart;
1632   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1633   cols_tmp = cols;
1634   for (i=0; i<mb; i++) {
1635     ncol = bi[i+1]-bi[i];
1636     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1637     row++; array += ncol; cols_tmp += ncol;
1638   }
1639   ierr = PetscFree(cols);CHKERRQ(ierr);
1640 
1641   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1642   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1643   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1644     *matout = B;
1645   } else {
1646     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1647   }
1648   PetscFunctionReturn(0);
1649 }
1650 
1651 #undef __FUNCT__
1652 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1653 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1654 {
1655   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1656   Mat            a = aij->A,b = aij->B;
1657   PetscErrorCode ierr;
1658   PetscInt       s1,s2,s3;
1659 
1660   PetscFunctionBegin;
1661   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1662   if (rr) {
1663     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1664     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1665     /* Overlap communication with computation. */
1666     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1667   }
1668   if (ll) {
1669     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1670     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1671     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1672   }
1673   /* scale  the diagonal block */
1674   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1675 
1676   if (rr) {
1677     /* Do a scatter end and then right scale the off-diagonal block */
1678     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1679     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1680   }
1681 
1682   PetscFunctionReturn(0);
1683 }
1684 
1685 #undef __FUNCT__
1686 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1687 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1688 {
1689   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1690   PetscErrorCode ierr;
1691 
1692   PetscFunctionBegin;
1693   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1694   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1695   PetscFunctionReturn(0);
1696 }
1697 #undef __FUNCT__
1698 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1699 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1700 {
1701   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1702   PetscErrorCode ierr;
1703 
1704   PetscFunctionBegin;
1705   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1706   PetscFunctionReturn(0);
1707 }
1708 
1709 #undef __FUNCT__
1710 #define __FUNCT__ "MatEqual_MPIAIJ"
1711 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1712 {
1713   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1714   Mat            a,b,c,d;
1715   PetscTruth     flg;
1716   PetscErrorCode ierr;
1717 
1718   PetscFunctionBegin;
1719   a = matA->A; b = matA->B;
1720   c = matB->A; d = matB->B;
1721 
1722   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1723   if (flg) {
1724     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1725   }
1726   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1727   PetscFunctionReturn(0);
1728 }
1729 
1730 #undef __FUNCT__
1731 #define __FUNCT__ "MatCopy_MPIAIJ"
1732 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1733 {
1734   PetscErrorCode ierr;
1735   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1736   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1737 
1738   PetscFunctionBegin;
1739   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1740   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1741     /* because of the column compression in the off-processor part of the matrix a->B,
1742        the number of columns in a->B and b->B may be different, hence we cannot call
1743        the MatCopy() directly on the two parts. If need be, we can provide a more
1744        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1745        then copying the submatrices */
1746     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1747   } else {
1748     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1749     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1750   }
1751   PetscFunctionReturn(0);
1752 }
1753 
1754 #undef __FUNCT__
1755 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1756 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1757 {
1758   PetscErrorCode ierr;
1759 
1760   PetscFunctionBegin;
1761   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1762   PetscFunctionReturn(0);
1763 }
1764 
1765 #include "petscblaslapack.h"
1766 #undef __FUNCT__
1767 #define __FUNCT__ "MatAXPY_MPIAIJ"
1768 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1769 {
1770   PetscErrorCode ierr;
1771   PetscInt       i;
1772   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1773   PetscBLASInt   bnz,one=1;
1774   Mat_SeqAIJ     *x,*y;
1775 
1776   PetscFunctionBegin;
1777   if (str == SAME_NONZERO_PATTERN) {
1778     PetscScalar alpha = a;
1779     x = (Mat_SeqAIJ *)xx->A->data;
1780     y = (Mat_SeqAIJ *)yy->A->data;
1781     bnz = PetscBLASIntCast(x->nz);
1782     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1783     x = (Mat_SeqAIJ *)xx->B->data;
1784     y = (Mat_SeqAIJ *)yy->B->data;
1785     bnz = PetscBLASIntCast(x->nz);
1786     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1787   } else if (str == SUBSET_NONZERO_PATTERN) {
1788     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1789 
1790     x = (Mat_SeqAIJ *)xx->B->data;
1791     y = (Mat_SeqAIJ *)yy->B->data;
1792     if (y->xtoy && y->XtoY != xx->B) {
1793       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1794       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1795     }
1796     if (!y->xtoy) { /* get xtoy */
1797       ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1798       y->XtoY = xx->B;
1799       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1800     }
1801     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1802   } else {
1803     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1804   }
1805   PetscFunctionReturn(0);
1806 }
1807 
1808 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1809 
1810 #undef __FUNCT__
1811 #define __FUNCT__ "MatConjugate_MPIAIJ"
1812 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1813 {
1814 #if defined(PETSC_USE_COMPLEX)
1815   PetscErrorCode ierr;
1816   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1817 
1818   PetscFunctionBegin;
1819   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1820   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1821 #else
1822   PetscFunctionBegin;
1823 #endif
1824   PetscFunctionReturn(0);
1825 }
1826 
1827 #undef __FUNCT__
1828 #define __FUNCT__ "MatRealPart_MPIAIJ"
1829 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1830 {
1831   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1832   PetscErrorCode ierr;
1833 
1834   PetscFunctionBegin;
1835   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1836   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1837   PetscFunctionReturn(0);
1838 }
1839 
1840 #undef __FUNCT__
1841 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1842 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1843 {
1844   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1845   PetscErrorCode ierr;
1846 
1847   PetscFunctionBegin;
1848   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1849   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1850   PetscFunctionReturn(0);
1851 }
1852 
1853 #ifdef PETSC_HAVE_PBGL
1854 
1855 #include <boost/parallel/mpi/bsp_process_group.hpp>
1856 #include <boost/graph/distributed/ilu_default_graph.hpp>
1857 #include <boost/graph/distributed/ilu_0_block.hpp>
1858 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1859 #include <boost/graph/distributed/petsc/interface.hpp>
1860 #include <boost/multi_array.hpp>
1861 #include <boost/parallel/distributed_property_map.hpp>
1862 
1863 #undef __FUNCT__
1864 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1865 /*
1866   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1867 */
1868 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact)
1869 {
1870   namespace petsc = boost::distributed::petsc;
1871 
1872   namespace graph_dist = boost::graph::distributed;
1873   using boost::graph::distributed::ilu_default::process_group_type;
1874   using boost::graph::ilu_permuted;
1875 
1876   PetscTruth      row_identity, col_identity;
1877   PetscContainer  c;
1878   PetscInt        m, n, M, N;
1879   PetscErrorCode  ierr;
1880 
1881   PetscFunctionBegin;
1882   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1883   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1884   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1885   if (!row_identity || !col_identity) {
1886     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1887   }
1888 
1889   process_group_type pg;
1890   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1891   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1892   lgraph_type&   level_graph = *lgraph_p;
1893   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1894 
1895   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1896   ilu_permuted(level_graph);
1897 
1898   /* put together the new matrix */
1899   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1900   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1901   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1902   ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr);
1903   ierr = MatSetType(*fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1904   ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1905   ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1906   (*fact)->factor = FACTOR_LU;
1907 
1908   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1909   ierr = PetscContainerSetPointer(c, lgraph_p);
1910   ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c);
1911   PetscFunctionReturn(0);
1912 }
1913 
1914 #undef __FUNCT__
1915 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1916 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B)
1917 {
1918   PetscFunctionBegin;
1919   PetscFunctionReturn(0);
1920 }
1921 
1922 #undef __FUNCT__
1923 #define __FUNCT__ "MatSolve_MPIAIJ"
1924 /*
1925   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1926 */
1927 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1928 {
1929   namespace graph_dist = boost::graph::distributed;
1930 
1931   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1932   lgraph_type*   lgraph_p;
1933   PetscContainer c;
1934   PetscErrorCode ierr;
1935 
1936   PetscFunctionBegin;
1937   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1938   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1939   ierr = VecCopy(b, x); CHKERRQ(ierr);
1940 
1941   PetscScalar* array_x;
1942   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1943   PetscInt sx;
1944   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1945 
1946   PetscScalar* array_b;
1947   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1948   PetscInt sb;
1949   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1950 
1951   lgraph_type&   level_graph = *lgraph_p;
1952   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1953 
1954   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
1955   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
1956                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
1957 
1958   typedef boost::iterator_property_map<array_ref_type::iterator,
1959                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
1960   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
1961                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
1962 
1963   ilu_set_solve(*lgraph_p, vector_b, vector_x);
1964 
1965   PetscFunctionReturn(0);
1966 }
1967 #endif
1968 
1969 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
1970   PetscInt       nzlocal,nsends,nrecvs;
1971   PetscMPIInt    *send_rank;
1972   PetscInt       *sbuf_nz,*sbuf_j,**rbuf_j;
1973   PetscScalar    *sbuf_a,**rbuf_a;
1974   PetscErrorCode (*MatDestroy)(Mat);
1975 } Mat_Redundant;
1976 
1977 #undef __FUNCT__
1978 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
1979 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
1980 {
1981   PetscErrorCode       ierr;
1982   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
1983   PetscInt             i;
1984 
1985   PetscFunctionBegin;
1986   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
1987   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1988   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1989   for (i=0; i<redund->nrecvs; i++){
1990     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1991     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1992   }
1993   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1994   ierr = PetscFree(redund);CHKERRQ(ierr);
1995   PetscFunctionReturn(0);
1996 }
1997 
1998 #undef __FUNCT__
1999 #define __FUNCT__ "MatDestroy_MatRedundant"
2000 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2001 {
2002   PetscErrorCode  ierr;
2003   PetscContainer  container;
2004   Mat_Redundant   *redund=PETSC_NULL;
2005 
2006   PetscFunctionBegin;
2007   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2008   if (container) {
2009     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2010   } else {
2011     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2012   }
2013   A->ops->destroy = redund->MatDestroy;
2014   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2015   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2016   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2017   PetscFunctionReturn(0);
2018 }
2019 
2020 #undef __FUNCT__
2021 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2022 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2023 {
2024   PetscMPIInt    rank,size;
2025   MPI_Comm       comm=((PetscObject)mat)->comm;
2026   PetscErrorCode ierr;
2027   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2028   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2029   PetscInt       *rowrange=mat->rmap.range;
2030   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2031   Mat            A=aij->A,B=aij->B,C=*matredundant;
2032   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2033   PetscScalar    *sbuf_a;
2034   PetscInt       nzlocal=a->nz+b->nz;
2035   PetscInt       j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2036   PetscInt       rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N;
2037   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2038   MatScalar      *aworkA,*aworkB;
2039   PetscScalar    *vals;
2040   PetscMPIInt    tag1,tag2,tag3,imdex;
2041   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2042                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2043   MPI_Status     recv_status,*send_status;
2044   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2045   PetscInt       **rbuf_j=PETSC_NULL;
2046   PetscScalar    **rbuf_a=PETSC_NULL;
2047   Mat_Redundant  *redund=PETSC_NULL;
2048   PetscContainer container;
2049 
2050   PetscFunctionBegin;
2051   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2052   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2053 
2054   if (reuse == MAT_REUSE_MATRIX) {
2055     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2056     if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2057     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2058     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2059     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2060     if (container) {
2061       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2062     } else {
2063       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2064     }
2065     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2066 
2067     nsends    = redund->nsends;
2068     nrecvs    = redund->nrecvs;
2069     send_rank = redund->send_rank; recv_rank = send_rank + size;
2070     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
2071     sbuf_j    = redund->sbuf_j;
2072     sbuf_a    = redund->sbuf_a;
2073     rbuf_j    = redund->rbuf_j;
2074     rbuf_a    = redund->rbuf_a;
2075   }
2076 
2077   if (reuse == MAT_INITIAL_MATRIX){
2078     PetscMPIInt  subrank,subsize;
2079     PetscInt     nleftover,np_subcomm;
2080     /* get the destination processors' id send_rank, nsends and nrecvs */
2081     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2082     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2083     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
2084     recv_rank = send_rank + size;
2085     np_subcomm = size/nsubcomm;
2086     nleftover  = size - nsubcomm*np_subcomm;
2087     nsends = 0; nrecvs = 0;
2088     for (i=0; i<size; i++){ /* i=rank*/
2089       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2090         send_rank[nsends] = i; nsends++;
2091         recv_rank[nrecvs++] = i;
2092       }
2093     }
2094     if (rank >= size - nleftover){/* this proc is a leftover processor */
2095       i = size-nleftover-1;
2096       j = 0;
2097       while (j < nsubcomm - nleftover){
2098         send_rank[nsends++] = i;
2099         i--; j++;
2100       }
2101     }
2102 
2103     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2104       for (i=0; i<nleftover; i++){
2105         recv_rank[nrecvs++] = size-nleftover+i;
2106       }
2107     }
2108 
2109     /* allocate sbuf_j, sbuf_a */
2110     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2111     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2112     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2113   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2114 
2115   /* copy mat's local entries into the buffers */
2116   if (reuse == MAT_INITIAL_MATRIX){
2117     rownz_max = 0;
2118     rptr = sbuf_j;
2119     cols = sbuf_j + rend-rstart + 1;
2120     vals = sbuf_a;
2121     rptr[0] = 0;
2122     for (i=0; i<rend-rstart; i++){
2123       row = i + rstart;
2124       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2125       ncols  = nzA + nzB;
2126       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2127       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2128       /* load the column indices for this row into cols */
2129       lwrite = 0;
2130       for (l=0; l<nzB; l++) {
2131         if ((ctmp = bmap[cworkB[l]]) < cstart){
2132           vals[lwrite]   = aworkB[l];
2133           cols[lwrite++] = ctmp;
2134         }
2135       }
2136       for (l=0; l<nzA; l++){
2137         vals[lwrite]   = aworkA[l];
2138         cols[lwrite++] = cstart + cworkA[l];
2139       }
2140       for (l=0; l<nzB; l++) {
2141         if ((ctmp = bmap[cworkB[l]]) >= cend){
2142           vals[lwrite]   = aworkB[l];
2143           cols[lwrite++] = ctmp;
2144         }
2145       }
2146       vals += ncols;
2147       cols += ncols;
2148       rptr[i+1] = rptr[i] + ncols;
2149       if (rownz_max < ncols) rownz_max = ncols;
2150     }
2151     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2152   } else { /* only copy matrix values into sbuf_a */
2153     rptr = sbuf_j;
2154     vals = sbuf_a;
2155     rptr[0] = 0;
2156     for (i=0; i<rend-rstart; i++){
2157       row = i + rstart;
2158       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2159       ncols  = nzA + nzB;
2160       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2161       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2162       lwrite = 0;
2163       for (l=0; l<nzB; l++) {
2164         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2165       }
2166       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2167       for (l=0; l<nzB; l++) {
2168         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2169       }
2170       vals += ncols;
2171       rptr[i+1] = rptr[i] + ncols;
2172     }
2173   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2174 
2175   /* send nzlocal to others, and recv other's nzlocal */
2176   /*--------------------------------------------------*/
2177   if (reuse == MAT_INITIAL_MATRIX){
2178     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2179     s_waits2 = s_waits3 + nsends;
2180     s_waits1 = s_waits2 + nsends;
2181     r_waits1 = s_waits1 + nsends;
2182     r_waits2 = r_waits1 + nrecvs;
2183     r_waits3 = r_waits2 + nrecvs;
2184   } else {
2185     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2186     r_waits3 = s_waits3 + nsends;
2187   }
2188 
2189   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2190   if (reuse == MAT_INITIAL_MATRIX){
2191     /* get new tags to keep the communication clean */
2192     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2193     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2194     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2195     rbuf_nz = sbuf_nz + nsends;
2196 
2197     /* post receives of other's nzlocal */
2198     for (i=0; i<nrecvs; i++){
2199       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2200     }
2201     /* send nzlocal to others */
2202     for (i=0; i<nsends; i++){
2203       sbuf_nz[i] = nzlocal;
2204       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2205     }
2206     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2207     count = nrecvs;
2208     while (count) {
2209       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2210       recv_rank[imdex] = recv_status.MPI_SOURCE;
2211       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2212       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2213 
2214       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2215       rbuf_nz[imdex] += i + 2;
2216       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2217       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2218       count--;
2219     }
2220     /* wait on sends of nzlocal */
2221     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2222     /* send mat->i,j to others, and recv from other's */
2223     /*------------------------------------------------*/
2224     for (i=0; i<nsends; i++){
2225       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2226       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2227     }
2228     /* wait on receives of mat->i,j */
2229     /*------------------------------*/
2230     count = nrecvs;
2231     while (count) {
2232       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2233       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2234       count--;
2235     }
2236     /* wait on sends of mat->i,j */
2237     /*---------------------------*/
2238     if (nsends) {
2239       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2240     }
2241   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2242 
2243   /* post receives, send and receive mat->a */
2244   /*----------------------------------------*/
2245   for (imdex=0; imdex<nrecvs; imdex++) {
2246     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2247   }
2248   for (i=0; i<nsends; i++){
2249     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2250   }
2251   count = nrecvs;
2252   while (count) {
2253     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2254     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2255     count--;
2256   }
2257   if (nsends) {
2258     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2259   }
2260 
2261   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2262 
2263   /* create redundant matrix */
2264   /*-------------------------*/
2265   if (reuse == MAT_INITIAL_MATRIX){
2266     /* compute rownz_max for preallocation */
2267     for (imdex=0; imdex<nrecvs; imdex++){
2268       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2269       rptr = rbuf_j[imdex];
2270       for (i=0; i<j; i++){
2271         ncols = rptr[i+1] - rptr[i];
2272         if (rownz_max < ncols) rownz_max = ncols;
2273       }
2274     }
2275 
2276     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2277     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2278     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2279     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2280     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2281   } else {
2282     C = *matredundant;
2283   }
2284 
2285   /* insert local matrix entries */
2286   rptr = sbuf_j;
2287   cols = sbuf_j + rend-rstart + 1;
2288   vals = sbuf_a;
2289   for (i=0; i<rend-rstart; i++){
2290     row   = i + rstart;
2291     ncols = rptr[i+1] - rptr[i];
2292     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2293     vals += ncols;
2294     cols += ncols;
2295   }
2296   /* insert received matrix entries */
2297   for (imdex=0; imdex<nrecvs; imdex++){
2298     rstart = rowrange[recv_rank[imdex]];
2299     rend   = rowrange[recv_rank[imdex]+1];
2300     rptr = rbuf_j[imdex];
2301     cols = rbuf_j[imdex] + rend-rstart + 1;
2302     vals = rbuf_a[imdex];
2303     for (i=0; i<rend-rstart; i++){
2304       row   = i + rstart;
2305       ncols = rptr[i+1] - rptr[i];
2306       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2307       vals += ncols;
2308       cols += ncols;
2309     }
2310   }
2311   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2312   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2313   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2314   if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N);
2315   if (reuse == MAT_INITIAL_MATRIX){
2316     PetscContainer container;
2317     *matredundant = C;
2318     /* create a supporting struct and attach it to C for reuse */
2319     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2320     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2321     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2322     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2323     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2324 
2325     redund->nzlocal = nzlocal;
2326     redund->nsends  = nsends;
2327     redund->nrecvs  = nrecvs;
2328     redund->send_rank = send_rank;
2329     redund->sbuf_nz = sbuf_nz;
2330     redund->sbuf_j  = sbuf_j;
2331     redund->sbuf_a  = sbuf_a;
2332     redund->rbuf_j  = rbuf_j;
2333     redund->rbuf_a  = rbuf_a;
2334 
2335     redund->MatDestroy = C->ops->destroy;
2336     C->ops->destroy    = MatDestroy_MatRedundant;
2337   }
2338   PetscFunctionReturn(0);
2339 }
2340 
2341 #undef __FUNCT__
2342 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2343 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2344 {
2345   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2346   PetscInt       n      = A->rmap.n;
2347   PetscInt       cstart = A->cmap.rstart;
2348   PetscInt      *cmap   = mat->garray;
2349   PetscInt      *diagIdx, *offdiagIdx;
2350   Vec            diagV, offdiagV;
2351   PetscScalar   *a, *diagA, *offdiagA;
2352   PetscInt       r;
2353   PetscErrorCode ierr;
2354 
2355   PetscFunctionBegin;
2356   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2357   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2358   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2359   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2360   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2361   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2362   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2363   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2364   for(r = 0; r < n; ++r) {
2365     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2366       a[r]   = diagA[r];
2367       idx[r] = cstart + diagIdx[r];
2368     } else {
2369       a[r]   = offdiagA[r];
2370       idx[r] = cmap[offdiagIdx[r]];
2371     }
2372   }
2373   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2374   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2375   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2376   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2377   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2378   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2379   PetscFunctionReturn(0);
2380 }
2381 
2382 #undef __FUNCT__
2383 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2384 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[])
2385 {
2386   PetscErrorCode ierr;
2387 
2388   PetscFunctionBegin;
2389   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
2390   PetscFunctionReturn(0);
2391 }
2392 
2393 /* -------------------------------------------------------------------*/
2394 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2395        MatGetRow_MPIAIJ,
2396        MatRestoreRow_MPIAIJ,
2397        MatMult_MPIAIJ,
2398 /* 4*/ MatMultAdd_MPIAIJ,
2399        MatMultTranspose_MPIAIJ,
2400        MatMultTransposeAdd_MPIAIJ,
2401 #ifdef PETSC_HAVE_PBGL
2402        MatSolve_MPIAIJ,
2403 #else
2404        0,
2405 #endif
2406        0,
2407        0,
2408 /*10*/ 0,
2409        0,
2410        0,
2411        MatRelax_MPIAIJ,
2412        MatTranspose_MPIAIJ,
2413 /*15*/ MatGetInfo_MPIAIJ,
2414        MatEqual_MPIAIJ,
2415        MatGetDiagonal_MPIAIJ,
2416        MatDiagonalScale_MPIAIJ,
2417        MatNorm_MPIAIJ,
2418 /*20*/ MatAssemblyBegin_MPIAIJ,
2419        MatAssemblyEnd_MPIAIJ,
2420        0,
2421        MatSetOption_MPIAIJ,
2422        MatZeroEntries_MPIAIJ,
2423 /*25*/ MatZeroRows_MPIAIJ,
2424        0,
2425 #ifdef PETSC_HAVE_PBGL
2426        MatLUFactorNumeric_MPIAIJ,
2427 #else
2428        0,
2429 #endif
2430        0,
2431        0,
2432 /*30*/ MatSetUpPreallocation_MPIAIJ,
2433 #ifdef PETSC_HAVE_PBGL
2434        MatILUFactorSymbolic_MPIAIJ,
2435 #else
2436        0,
2437 #endif
2438        0,
2439        0,
2440        0,
2441 /*35*/ MatDuplicate_MPIAIJ,
2442        0,
2443        0,
2444        0,
2445        0,
2446 /*40*/ MatAXPY_MPIAIJ,
2447        MatGetSubMatrices_MPIAIJ,
2448        MatIncreaseOverlap_MPIAIJ,
2449        MatGetValues_MPIAIJ,
2450        MatCopy_MPIAIJ,
2451 /*45*/ 0,
2452        MatScale_MPIAIJ,
2453        0,
2454        0,
2455        0,
2456 /*50*/ MatSetBlockSize_MPIAIJ,
2457        0,
2458        0,
2459        0,
2460        0,
2461 /*55*/ MatFDColoringCreate_MPIAIJ,
2462        0,
2463        MatSetUnfactored_MPIAIJ,
2464        MatPermute_MPIAIJ,
2465        0,
2466 /*60*/ MatGetSubMatrix_MPIAIJ,
2467        MatDestroy_MPIAIJ,
2468        MatView_MPIAIJ,
2469        0,
2470        0,
2471 /*65*/ 0,
2472        0,
2473        0,
2474        0,
2475        0,
2476 /*70*/ 0,
2477        0,
2478        MatSetColoring_MPIAIJ,
2479 #if defined(PETSC_HAVE_ADIC)
2480        MatSetValuesAdic_MPIAIJ,
2481 #else
2482        0,
2483 #endif
2484        MatSetValuesAdifor_MPIAIJ,
2485 /*75*/ 0,
2486        0,
2487        0,
2488        0,
2489        0,
2490 /*80*/ 0,
2491        0,
2492        0,
2493        0,
2494 /*84*/ MatLoad_MPIAIJ,
2495        0,
2496        0,
2497        0,
2498        0,
2499        0,
2500 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
2501        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2502        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2503        MatPtAP_Basic,
2504        MatPtAPSymbolic_MPIAIJ,
2505 /*95*/ MatPtAPNumeric_MPIAIJ,
2506        0,
2507        0,
2508        0,
2509        0,
2510 /*100*/0,
2511        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2512        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2513        MatConjugate_MPIAIJ,
2514        0,
2515 /*105*/MatSetValuesRow_MPIAIJ,
2516        MatRealPart_MPIAIJ,
2517        MatImaginaryPart_MPIAIJ,
2518        0,
2519        0,
2520 /*110*/0,
2521        MatGetRedundantMatrix_MPIAIJ,
2522        MatGetRowMin_MPIAIJ,
2523        0,
2524        0,
2525 /*115*/MatGetSeqNonzerostructure_MPIAIJ};
2526 
2527 /* ----------------------------------------------------------------------------------------*/
2528 
2529 EXTERN_C_BEGIN
2530 #undef __FUNCT__
2531 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2532 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2533 {
2534   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2535   PetscErrorCode ierr;
2536 
2537   PetscFunctionBegin;
2538   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2539   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2540   PetscFunctionReturn(0);
2541 }
2542 EXTERN_C_END
2543 
2544 EXTERN_C_BEGIN
2545 #undef __FUNCT__
2546 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2547 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2548 {
2549   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2550   PetscErrorCode ierr;
2551 
2552   PetscFunctionBegin;
2553   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2554   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2555   PetscFunctionReturn(0);
2556 }
2557 EXTERN_C_END
2558 
2559 #include "petscpc.h"
2560 EXTERN_C_BEGIN
2561 #undef __FUNCT__
2562 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2563 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2564 {
2565   Mat_MPIAIJ     *b;
2566   PetscErrorCode ierr;
2567   PetscInt       i;
2568 
2569   PetscFunctionBegin;
2570   B->preallocated = PETSC_TRUE;
2571   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2572   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2573   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2574   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2575 
2576   B->rmap.bs = B->cmap.bs = 1;
2577   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2578   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2579   if (d_nnz) {
2580     for (i=0; i<B->rmap.n; i++) {
2581       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2582     }
2583   }
2584   if (o_nnz) {
2585     for (i=0; i<B->rmap.n; i++) {
2586       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2587     }
2588   }
2589   b = (Mat_MPIAIJ*)B->data;
2590 
2591   /* Explicitly create 2 MATSEQAIJ matrices. */
2592   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2593   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
2594   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2595   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2596   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2597   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
2598   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2599   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2600 
2601   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2602   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2603 
2604   PetscFunctionReturn(0);
2605 }
2606 EXTERN_C_END
2607 
2608 #undef __FUNCT__
2609 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2610 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2611 {
2612   Mat            mat;
2613   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2614   PetscErrorCode ierr;
2615 
2616   PetscFunctionBegin;
2617   *newmat       = 0;
2618   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2619   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2620   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2621   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2622   a    = (Mat_MPIAIJ*)mat->data;
2623 
2624   mat->factor       = matin->factor;
2625   mat->rmap.bs      = matin->rmap.bs;
2626   mat->assembled    = PETSC_TRUE;
2627   mat->insertmode   = NOT_SET_VALUES;
2628   mat->preallocated = PETSC_TRUE;
2629 
2630   a->size           = oldmat->size;
2631   a->rank           = oldmat->rank;
2632   a->donotstash     = oldmat->donotstash;
2633   a->roworiented    = oldmat->roworiented;
2634   a->rowindices     = 0;
2635   a->rowvalues      = 0;
2636   a->getrowactive   = PETSC_FALSE;
2637 
2638   ierr = PetscMapCopy(((PetscObject)mat)->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2639   ierr = PetscMapCopy(((PetscObject)mat)->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2640 
2641   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2642   if (oldmat->colmap) {
2643 #if defined (PETSC_USE_CTABLE)
2644     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2645 #else
2646     ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2647     ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2648     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2649 #endif
2650   } else a->colmap = 0;
2651   if (oldmat->garray) {
2652     PetscInt len;
2653     len  = oldmat->B->cmap.n;
2654     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2655     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2656     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2657   } else a->garray = 0;
2658 
2659   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2660   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2661   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2662   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2663   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2664   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2665   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2666   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2667   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2668   *newmat = mat;
2669   PetscFunctionReturn(0);
2670 }
2671 
2672 #include "petscsys.h"
2673 
2674 #undef __FUNCT__
2675 #define __FUNCT__ "MatLoad_MPIAIJ"
2676 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2677 {
2678   Mat            A;
2679   PetscScalar    *vals,*svals;
2680   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2681   MPI_Status     status;
2682   PetscErrorCode ierr;
2683   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
2684   PetscInt       i,nz,j,rstart,rend,mmax;
2685   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2686   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2687   PetscInt       cend,cstart,n,*rowners;
2688   int            fd;
2689 
2690   PetscFunctionBegin;
2691   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2692   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2693   if (!rank) {
2694     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2695     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2696     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2697   }
2698 
2699   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2700   M = header[1]; N = header[2];
2701   /* determine ownership of all rows */
2702   m    = M/size + ((M % size) > rank);
2703   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2704   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2705 
2706   /* First process needs enough room for process with most rows */
2707   if (!rank) {
2708     mmax       = rowners[1];
2709     for (i=2; i<size; i++) {
2710       mmax = PetscMax(mmax,rowners[i]);
2711     }
2712   } else mmax = m;
2713 
2714   rowners[0] = 0;
2715   for (i=2; i<=size; i++) {
2716     rowners[i] += rowners[i-1];
2717   }
2718   rstart = rowners[rank];
2719   rend   = rowners[rank+1];
2720 
2721   /* distribute row lengths to all processors */
2722   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2723   if (!rank) {
2724     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2725     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2726     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2727     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2728     for (j=0; j<m; j++) {
2729       procsnz[0] += ourlens[j];
2730     }
2731     for (i=1; i<size; i++) {
2732       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2733       /* calculate the number of nonzeros on each processor */
2734       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2735         procsnz[i] += rowlengths[j];
2736       }
2737       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2738     }
2739     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2740   } else {
2741     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2742   }
2743 
2744   if (!rank) {
2745     /* determine max buffer needed and allocate it */
2746     maxnz = 0;
2747     for (i=0; i<size; i++) {
2748       maxnz = PetscMax(maxnz,procsnz[i]);
2749     }
2750     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2751 
2752     /* read in my part of the matrix column indices  */
2753     nz   = procsnz[0];
2754     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2755     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2756 
2757     /* read in every one elses and ship off */
2758     for (i=1; i<size; i++) {
2759       nz   = procsnz[i];
2760       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2761       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2762     }
2763     ierr = PetscFree(cols);CHKERRQ(ierr);
2764   } else {
2765     /* determine buffer space needed for message */
2766     nz = 0;
2767     for (i=0; i<m; i++) {
2768       nz += ourlens[i];
2769     }
2770     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2771 
2772     /* receive message of column indices*/
2773     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2774     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2775     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2776   }
2777 
2778   /* determine column ownership if matrix is not square */
2779   if (N != M) {
2780     n      = N/size + ((N % size) > rank);
2781     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2782     cstart = cend - n;
2783   } else {
2784     cstart = rstart;
2785     cend   = rend;
2786     n      = cend - cstart;
2787   }
2788 
2789   /* loop over local rows, determining number of off diagonal entries */
2790   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2791   jj = 0;
2792   for (i=0; i<m; i++) {
2793     for (j=0; j<ourlens[i]; j++) {
2794       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2795       jj++;
2796     }
2797   }
2798 
2799   /* create our matrix */
2800   for (i=0; i<m; i++) {
2801     ourlens[i] -= offlens[i];
2802   }
2803   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2804   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2805   ierr = MatSetType(A,type);CHKERRQ(ierr);
2806   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2807 
2808   for (i=0; i<m; i++) {
2809     ourlens[i] += offlens[i];
2810   }
2811 
2812   if (!rank) {
2813     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2814 
2815     /* read in my part of the matrix numerical values  */
2816     nz   = procsnz[0];
2817     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2818 
2819     /* insert into matrix */
2820     jj      = rstart;
2821     smycols = mycols;
2822     svals   = vals;
2823     for (i=0; i<m; i++) {
2824       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2825       smycols += ourlens[i];
2826       svals   += ourlens[i];
2827       jj++;
2828     }
2829 
2830     /* read in other processors and ship out */
2831     for (i=1; i<size; i++) {
2832       nz   = procsnz[i];
2833       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2834       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2835     }
2836     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2837   } else {
2838     /* receive numeric values */
2839     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2840 
2841     /* receive message of values*/
2842     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2843     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2844     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2845 
2846     /* insert into matrix */
2847     jj      = rstart;
2848     smycols = mycols;
2849     svals   = vals;
2850     for (i=0; i<m; i++) {
2851       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2852       smycols += ourlens[i];
2853       svals   += ourlens[i];
2854       jj++;
2855     }
2856   }
2857   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2858   ierr = PetscFree(vals);CHKERRQ(ierr);
2859   ierr = PetscFree(mycols);CHKERRQ(ierr);
2860   ierr = PetscFree(rowners);CHKERRQ(ierr);
2861 
2862   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2863   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2864   *newmat = A;
2865   PetscFunctionReturn(0);
2866 }
2867 
2868 #undef __FUNCT__
2869 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2870 /*
2871     Not great since it makes two copies of the submatrix, first an SeqAIJ
2872   in local and then by concatenating the local matrices the end result.
2873   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2874 */
2875 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2876 {
2877   PetscErrorCode ierr;
2878   PetscMPIInt    rank,size;
2879   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2880   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2881   Mat            *local,M,Mreuse;
2882   MatScalar      *vwork,*aa;
2883   MPI_Comm       comm = ((PetscObject)mat)->comm;
2884   Mat_SeqAIJ     *aij;
2885 
2886 
2887   PetscFunctionBegin;
2888   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2889   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2890 
2891   if (call ==  MAT_REUSE_MATRIX) {
2892     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2893     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2894     local = &Mreuse;
2895     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2896   } else {
2897     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2898     Mreuse = *local;
2899     ierr   = PetscFree(local);CHKERRQ(ierr);
2900   }
2901 
2902   /*
2903       m - number of local rows
2904       n - number of columns (same on all processors)
2905       rstart - first row in new global matrix generated
2906   */
2907   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2908   if (call == MAT_INITIAL_MATRIX) {
2909     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2910     ii  = aij->i;
2911     jj  = aij->j;
2912 
2913     /*
2914         Determine the number of non-zeros in the diagonal and off-diagonal
2915         portions of the matrix in order to do correct preallocation
2916     */
2917 
2918     /* first get start and end of "diagonal" columns */
2919     if (csize == PETSC_DECIDE) {
2920       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2921       if (mglobal == n) { /* square matrix */
2922 	nlocal = m;
2923       } else {
2924         nlocal = n/size + ((n % size) > rank);
2925       }
2926     } else {
2927       nlocal = csize;
2928     }
2929     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2930     rstart = rend - nlocal;
2931     if (rank == size - 1 && rend != n) {
2932       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2933     }
2934 
2935     /* next, compute all the lengths */
2936     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2937     olens = dlens + m;
2938     for (i=0; i<m; i++) {
2939       jend = ii[i+1] - ii[i];
2940       olen = 0;
2941       dlen = 0;
2942       for (j=0; j<jend; j++) {
2943         if (*jj < rstart || *jj >= rend) olen++;
2944         else dlen++;
2945         jj++;
2946       }
2947       olens[i] = olen;
2948       dlens[i] = dlen;
2949     }
2950     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2951     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2952     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2953     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2954     ierr = PetscFree(dlens);CHKERRQ(ierr);
2955   } else {
2956     PetscInt ml,nl;
2957 
2958     M = *newmat;
2959     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2960     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2961     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2962     /*
2963          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2964        rather than the slower MatSetValues().
2965     */
2966     M->was_assembled = PETSC_TRUE;
2967     M->assembled     = PETSC_FALSE;
2968   }
2969   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2970   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2971   ii  = aij->i;
2972   jj  = aij->j;
2973   aa  = aij->a;
2974   for (i=0; i<m; i++) {
2975     row   = rstart + i;
2976     nz    = ii[i+1] - ii[i];
2977     cwork = jj;     jj += nz;
2978     vwork = aa;     aa += nz;
2979     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2980   }
2981 
2982   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2983   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2984   *newmat = M;
2985 
2986   /* save submatrix used in processor for next request */
2987   if (call ==  MAT_INITIAL_MATRIX) {
2988     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2989     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2990   }
2991 
2992   PetscFunctionReturn(0);
2993 }
2994 
2995 EXTERN_C_BEGIN
2996 #undef __FUNCT__
2997 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2998 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
2999 {
3000   PetscInt       m,cstart, cend,j,nnz,i,d;
3001   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3002   const PetscInt *JJ;
3003   PetscScalar    *values;
3004   PetscErrorCode ierr;
3005 
3006   PetscFunctionBegin;
3007   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3008 
3009   B->rmap.bs = B->cmap.bs = 1;
3010   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
3011   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
3012   m      = B->rmap.n;
3013   cstart = B->cmap.rstart;
3014   cend   = B->cmap.rend;
3015   rstart = B->rmap.rstart;
3016 
3017   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
3018   o_nnz = d_nnz + m;
3019 
3020 #if defined(PETSC_USE_DEBUGGING)
3021   for (i=0; i<m; i++) {
3022     nnz     = Ii[i+1]- Ii[i];
3023     JJ      = J + Ii[i];
3024     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3025     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3026     if (nnz && (JJ[nnz-1] >= B->cmap.N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap.N);
3027     for (j=1; j<nnz; j++) {
3028       if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j);
3029     }
3030   }
3031 #endif
3032 
3033   for (i=0; i<m; i++) {
3034     nnz     = Ii[i+1]- Ii[i];
3035     JJ      = J + Ii[i];
3036     nnz_max = PetscMax(nnz_max,nnz);
3037     for (j=0; j<nnz; j++) {
3038       if (*JJ >= cstart) break;
3039       JJ++;
3040     }
3041     d = 0;
3042     for (; j<nnz; j++) {
3043       if (*JJ++ >= cend) break;
3044       d++;
3045     }
3046     d_nnz[i] = d;
3047     o_nnz[i] = nnz - d;
3048   }
3049   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3050   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
3051 
3052   if (v) values = (PetscScalar*)v;
3053   else {
3054     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3055     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3056   }
3057 
3058   for (i=0; i<m; i++) {
3059     ii   = i + rstart;
3060     nnz  = Ii[i+1]- Ii[i];
3061     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3062   }
3063   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3064   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3065 
3066   if (!v) {
3067     ierr = PetscFree(values);CHKERRQ(ierr);
3068   }
3069   PetscFunctionReturn(0);
3070 }
3071 EXTERN_C_END
3072 
3073 #undef __FUNCT__
3074 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3075 /*@
3076    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3077    (the default parallel PETSc format).
3078 
3079    Collective on MPI_Comm
3080 
3081    Input Parameters:
3082 +  B - the matrix
3083 .  i - the indices into j for the start of each local row (starts with zero)
3084 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3085 -  v - optional values in the matrix
3086 
3087    Level: developer
3088 
3089    Notes:
3090        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3091      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3092      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3093 
3094        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3095 
3096        The format which is used for the sparse matrix input, is equivalent to a
3097     row-major ordering.. i.e for the following matrix, the input data expected is
3098     as shown:
3099 
3100         1 0 0
3101         2 0 3     P0
3102        -------
3103         4 5 6     P1
3104 
3105      Process0 [P0]: rows_owned=[0,1]
3106         i =  {0,1,3}  [size = nrow+1  = 2+1]
3107         j =  {0,0,2}  [size = nz = 6]
3108         v =  {1,2,3}  [size = nz = 6]
3109 
3110      Process1 [P1]: rows_owned=[2]
3111         i =  {0,3}    [size = nrow+1  = 1+1]
3112         j =  {0,1,2}  [size = nz = 6]
3113         v =  {4,5,6}  [size = nz = 6]
3114 
3115       The column indices for each row MUST be sorted.
3116 
3117 .keywords: matrix, aij, compressed row, sparse, parallel
3118 
3119 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3120           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3121 @*/
3122 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3123 {
3124   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3125 
3126   PetscFunctionBegin;
3127   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3128   if (f) {
3129     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3130   }
3131   PetscFunctionReturn(0);
3132 }
3133 
3134 #undef __FUNCT__
3135 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3136 /*@C
3137    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3138    (the default parallel PETSc format).  For good matrix assembly performance
3139    the user should preallocate the matrix storage by setting the parameters
3140    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3141    performance can be increased by more than a factor of 50.
3142 
3143    Collective on MPI_Comm
3144 
3145    Input Parameters:
3146 +  A - the matrix
3147 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3148            (same value is used for all local rows)
3149 .  d_nnz - array containing the number of nonzeros in the various rows of the
3150            DIAGONAL portion of the local submatrix (possibly different for each row)
3151            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3152            The size of this array is equal to the number of local rows, i.e 'm'.
3153            You must leave room for the diagonal entry even if it is zero.
3154 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3155            submatrix (same value is used for all local rows).
3156 -  o_nnz - array containing the number of nonzeros in the various rows of the
3157            OFF-DIAGONAL portion of the local submatrix (possibly different for
3158            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3159            structure. The size of this array is equal to the number
3160            of local rows, i.e 'm'.
3161 
3162    If the *_nnz parameter is given then the *_nz parameter is ignored
3163 
3164    The AIJ format (also called the Yale sparse matrix format or
3165    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3166    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3167 
3168    The parallel matrix is partitioned such that the first m0 rows belong to
3169    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3170    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3171 
3172    The DIAGONAL portion of the local submatrix of a processor can be defined
3173    as the submatrix which is obtained by extraction the part corresponding
3174    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3175    first row that belongs to the processor, and r2 is the last row belonging
3176    to the this processor. This is a square mxm matrix. The remaining portion
3177    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3178 
3179    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3180 
3181    You can call MatGetInfo() to get information on how effective the preallocation was;
3182    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3183    You can also run with the option -info and look for messages with the string
3184    malloc in them to see if additional memory allocation was needed.
3185 
3186    Example usage:
3187 
3188    Consider the following 8x8 matrix with 34 non-zero values, that is
3189    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3190    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3191    as follows:
3192 
3193 .vb
3194             1  2  0  |  0  3  0  |  0  4
3195     Proc0   0  5  6  |  7  0  0  |  8  0
3196             9  0 10  | 11  0  0  | 12  0
3197     -------------------------------------
3198            13  0 14  | 15 16 17  |  0  0
3199     Proc1   0 18  0  | 19 20 21  |  0  0
3200             0  0  0  | 22 23  0  | 24  0
3201     -------------------------------------
3202     Proc2  25 26 27  |  0  0 28  | 29  0
3203            30  0  0  | 31 32 33  |  0 34
3204 .ve
3205 
3206    This can be represented as a collection of submatrices as:
3207 
3208 .vb
3209       A B C
3210       D E F
3211       G H I
3212 .ve
3213 
3214    Where the submatrices A,B,C are owned by proc0, D,E,F are
3215    owned by proc1, G,H,I are owned by proc2.
3216 
3217    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3218    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3219    The 'M','N' parameters are 8,8, and have the same values on all procs.
3220 
3221    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3222    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3223    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3224    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3225    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3226    matrix, ans [DF] as another SeqAIJ matrix.
3227 
3228    When d_nz, o_nz parameters are specified, d_nz storage elements are
3229    allocated for every row of the local diagonal submatrix, and o_nz
3230    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3231    One way to choose d_nz and o_nz is to use the max nonzerors per local
3232    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3233    In this case, the values of d_nz,o_nz are:
3234 .vb
3235      proc0 : dnz = 2, o_nz = 2
3236      proc1 : dnz = 3, o_nz = 2
3237      proc2 : dnz = 1, o_nz = 4
3238 .ve
3239    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3240    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3241    for proc3. i.e we are using 12+15+10=37 storage locations to store
3242    34 values.
3243 
3244    When d_nnz, o_nnz parameters are specified, the storage is specified
3245    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3246    In the above case the values for d_nnz,o_nnz are:
3247 .vb
3248      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3249      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3250      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3251 .ve
3252    Here the space allocated is sum of all the above values i.e 34, and
3253    hence pre-allocation is perfect.
3254 
3255    Level: intermediate
3256 
3257 .keywords: matrix, aij, compressed row, sparse, parallel
3258 
3259 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3260           MPIAIJ, MatGetInfo()
3261 @*/
3262 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3263 {
3264   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3265 
3266   PetscFunctionBegin;
3267   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3268   if (f) {
3269     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3270   }
3271   PetscFunctionReturn(0);
3272 }
3273 
3274 #undef __FUNCT__
3275 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3276 /*@
3277      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3278          CSR format the local rows.
3279 
3280    Collective on MPI_Comm
3281 
3282    Input Parameters:
3283 +  comm - MPI communicator
3284 .  m - number of local rows (Cannot be PETSC_DECIDE)
3285 .  n - This value should be the same as the local size used in creating the
3286        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3287        calculated if N is given) For square matrices n is almost always m.
3288 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3289 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3290 .   i - row indices
3291 .   j - column indices
3292 -   a - matrix values
3293 
3294    Output Parameter:
3295 .   mat - the matrix
3296 
3297    Level: intermediate
3298 
3299    Notes:
3300        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3301      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3302      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3303 
3304        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3305 
3306        The format which is used for the sparse matrix input, is equivalent to a
3307     row-major ordering.. i.e for the following matrix, the input data expected is
3308     as shown:
3309 
3310         1 0 0
3311         2 0 3     P0
3312        -------
3313         4 5 6     P1
3314 
3315      Process0 [P0]: rows_owned=[0,1]
3316         i =  {0,1,3}  [size = nrow+1  = 2+1]
3317         j =  {0,0,2}  [size = nz = 6]
3318         v =  {1,2,3}  [size = nz = 6]
3319 
3320      Process1 [P1]: rows_owned=[2]
3321         i =  {0,3}    [size = nrow+1  = 1+1]
3322         j =  {0,1,2}  [size = nz = 6]
3323         v =  {4,5,6}  [size = nz = 6]
3324 
3325 .keywords: matrix, aij, compressed row, sparse, parallel
3326 
3327 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3328           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3329 @*/
3330 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3331 {
3332   PetscErrorCode ierr;
3333 
3334  PetscFunctionBegin;
3335   if (i[0]) {
3336     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3337   }
3338   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3339   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3340   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3341   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3342   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3343   PetscFunctionReturn(0);
3344 }
3345 
3346 #undef __FUNCT__
3347 #define __FUNCT__ "MatCreateMPIAIJ"
3348 /*@C
3349    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3350    (the default parallel PETSc format).  For good matrix assembly performance
3351    the user should preallocate the matrix storage by setting the parameters
3352    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3353    performance can be increased by more than a factor of 50.
3354 
3355    Collective on MPI_Comm
3356 
3357    Input Parameters:
3358 +  comm - MPI communicator
3359 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3360            This value should be the same as the local size used in creating the
3361            y vector for the matrix-vector product y = Ax.
3362 .  n - This value should be the same as the local size used in creating the
3363        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3364        calculated if N is given) For square matrices n is almost always m.
3365 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3366 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3367 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3368            (same value is used for all local rows)
3369 .  d_nnz - array containing the number of nonzeros in the various rows of the
3370            DIAGONAL portion of the local submatrix (possibly different for each row)
3371            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3372            The size of this array is equal to the number of local rows, i.e 'm'.
3373            You must leave room for the diagonal entry even if it is zero.
3374 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3375            submatrix (same value is used for all local rows).
3376 -  o_nnz - array containing the number of nonzeros in the various rows of the
3377            OFF-DIAGONAL portion of the local submatrix (possibly different for
3378            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3379            structure. The size of this array is equal to the number
3380            of local rows, i.e 'm'.
3381 
3382    Output Parameter:
3383 .  A - the matrix
3384 
3385    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3386    MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
3387    true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
3388    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3389 
3390    Notes:
3391    If the *_nnz parameter is given then the *_nz parameter is ignored
3392 
3393    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3394    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3395    storage requirements for this matrix.
3396 
3397    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3398    processor than it must be used on all processors that share the object for
3399    that argument.
3400 
3401    The user MUST specify either the local or global matrix dimensions
3402    (possibly both).
3403 
3404    The parallel matrix is partitioned across processors such that the
3405    first m0 rows belong to process 0, the next m1 rows belong to
3406    process 1, the next m2 rows belong to process 2 etc.. where
3407    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3408    values corresponding to [m x N] submatrix.
3409 
3410    The columns are logically partitioned with the n0 columns belonging
3411    to 0th partition, the next n1 columns belonging to the next
3412    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3413 
3414    The DIAGONAL portion of the local submatrix on any given processor
3415    is the submatrix corresponding to the rows and columns m,n
3416    corresponding to the given processor. i.e diagonal matrix on
3417    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3418    etc. The remaining portion of the local submatrix [m x (N-n)]
3419    constitute the OFF-DIAGONAL portion. The example below better
3420    illustrates this concept.
3421 
3422    For a square global matrix we define each processor's diagonal portion
3423    to be its local rows and the corresponding columns (a square submatrix);
3424    each processor's off-diagonal portion encompasses the remainder of the
3425    local matrix (a rectangular submatrix).
3426 
3427    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3428 
3429    When calling this routine with a single process communicator, a matrix of
3430    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3431    type of communicator, use the construction mechanism:
3432      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
3433 
3434    By default, this format uses inodes (identical nodes) when possible.
3435    We search for consecutive rows with the same nonzero structure, thereby
3436    reusing matrix information to achieve increased efficiency.
3437 
3438    Options Database Keys:
3439 +  -mat_no_inode  - Do not use inodes
3440 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3441 -  -mat_aij_oneindex - Internally use indexing starting at 1
3442         rather than 0.  Note that when calling MatSetValues(),
3443         the user still MUST index entries starting at 0!
3444 
3445 
3446    Example usage:
3447 
3448    Consider the following 8x8 matrix with 34 non-zero values, that is
3449    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3450    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3451    as follows:
3452 
3453 .vb
3454             1  2  0  |  0  3  0  |  0  4
3455     Proc0   0  5  6  |  7  0  0  |  8  0
3456             9  0 10  | 11  0  0  | 12  0
3457     -------------------------------------
3458            13  0 14  | 15 16 17  |  0  0
3459     Proc1   0 18  0  | 19 20 21  |  0  0
3460             0  0  0  | 22 23  0  | 24  0
3461     -------------------------------------
3462     Proc2  25 26 27  |  0  0 28  | 29  0
3463            30  0  0  | 31 32 33  |  0 34
3464 .ve
3465 
3466    This can be represented as a collection of submatrices as:
3467 
3468 .vb
3469       A B C
3470       D E F
3471       G H I
3472 .ve
3473 
3474    Where the submatrices A,B,C are owned by proc0, D,E,F are
3475    owned by proc1, G,H,I are owned by proc2.
3476 
3477    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3478    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3479    The 'M','N' parameters are 8,8, and have the same values on all procs.
3480 
3481    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3482    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3483    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3484    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3485    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3486    matrix, ans [DF] as another SeqAIJ matrix.
3487 
3488    When d_nz, o_nz parameters are specified, d_nz storage elements are
3489    allocated for every row of the local diagonal submatrix, and o_nz
3490    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3491    One way to choose d_nz and o_nz is to use the max nonzerors per local
3492    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3493    In this case, the values of d_nz,o_nz are:
3494 .vb
3495      proc0 : dnz = 2, o_nz = 2
3496      proc1 : dnz = 3, o_nz = 2
3497      proc2 : dnz = 1, o_nz = 4
3498 .ve
3499    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3500    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3501    for proc3. i.e we are using 12+15+10=37 storage locations to store
3502    34 values.
3503 
3504    When d_nnz, o_nnz parameters are specified, the storage is specified
3505    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3506    In the above case the values for d_nnz,o_nnz are:
3507 .vb
3508      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3509      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3510      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3511 .ve
3512    Here the space allocated is sum of all the above values i.e 34, and
3513    hence pre-allocation is perfect.
3514 
3515    Level: intermediate
3516 
3517 .keywords: matrix, aij, compressed row, sparse, parallel
3518 
3519 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3520           MPIAIJ, MatCreateMPIAIJWithArrays()
3521 @*/
3522 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3523 {
3524   PetscErrorCode ierr;
3525   PetscMPIInt    size;
3526 
3527   PetscFunctionBegin;
3528   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3529   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3530   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3531   if (size > 1) {
3532     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3533     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3534   } else {
3535     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3536     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3537   }
3538   PetscFunctionReturn(0);
3539 }
3540 
3541 #undef __FUNCT__
3542 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3543 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3544 {
3545   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3546 
3547   PetscFunctionBegin;
3548   *Ad     = a->A;
3549   *Ao     = a->B;
3550   *colmap = a->garray;
3551   PetscFunctionReturn(0);
3552 }
3553 
3554 #undef __FUNCT__
3555 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3556 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3557 {
3558   PetscErrorCode ierr;
3559   PetscInt       i;
3560   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3561 
3562   PetscFunctionBegin;
3563   if (coloring->ctype == IS_COLORING_GLOBAL) {
3564     ISColoringValue *allcolors,*colors;
3565     ISColoring      ocoloring;
3566 
3567     /* set coloring for diagonal portion */
3568     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3569 
3570     /* set coloring for off-diagonal portion */
3571     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3572     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3573     for (i=0; i<a->B->cmap.n; i++) {
3574       colors[i] = allcolors[a->garray[i]];
3575     }
3576     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3577     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3578     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3579     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3580   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3581     ISColoringValue *colors;
3582     PetscInt        *larray;
3583     ISColoring      ocoloring;
3584 
3585     /* set coloring for diagonal portion */
3586     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3587     for (i=0; i<a->A->cmap.n; i++) {
3588       larray[i] = i + A->cmap.rstart;
3589     }
3590     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3591     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3592     for (i=0; i<a->A->cmap.n; i++) {
3593       colors[i] = coloring->colors[larray[i]];
3594     }
3595     ierr = PetscFree(larray);CHKERRQ(ierr);
3596     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3597     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3598     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3599 
3600     /* set coloring for off-diagonal portion */
3601     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3602     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3603     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3604     for (i=0; i<a->B->cmap.n; i++) {
3605       colors[i] = coloring->colors[larray[i]];
3606     }
3607     ierr = PetscFree(larray);CHKERRQ(ierr);
3608     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3609     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3610     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3611   } else {
3612     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3613   }
3614 
3615   PetscFunctionReturn(0);
3616 }
3617 
3618 #if defined(PETSC_HAVE_ADIC)
3619 #undef __FUNCT__
3620 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3621 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3622 {
3623   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3624   PetscErrorCode ierr;
3625 
3626   PetscFunctionBegin;
3627   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3628   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3629   PetscFunctionReturn(0);
3630 }
3631 #endif
3632 
3633 #undef __FUNCT__
3634 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3635 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3636 {
3637   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3638   PetscErrorCode ierr;
3639 
3640   PetscFunctionBegin;
3641   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3642   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3643   PetscFunctionReturn(0);
3644 }
3645 
3646 #undef __FUNCT__
3647 #define __FUNCT__ "MatMerge"
3648 /*@
3649       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3650                  matrices from each processor
3651 
3652     Collective on MPI_Comm
3653 
3654    Input Parameters:
3655 +    comm - the communicators the parallel matrix will live on
3656 .    inmat - the input sequential matrices
3657 .    n - number of local columns (or PETSC_DECIDE)
3658 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3659 
3660    Output Parameter:
3661 .    outmat - the parallel matrix generated
3662 
3663     Level: advanced
3664 
3665    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3666 
3667 @*/
3668 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3669 {
3670   PetscErrorCode ierr;
3671   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3672   PetscInt       *indx;
3673   PetscScalar    *values;
3674 
3675   PetscFunctionBegin;
3676   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3677   if (scall == MAT_INITIAL_MATRIX){
3678     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3679     if (n == PETSC_DECIDE){
3680       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3681     }
3682     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3683     rstart -= m;
3684 
3685     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3686     for (i=0;i<m;i++) {
3687       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3688       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3689       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3690     }
3691     /* This routine will ONLY return MPIAIJ type matrix */
3692     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3693     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3694     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3695     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3696     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3697 
3698   } else if (scall == MAT_REUSE_MATRIX){
3699     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3700   } else {
3701     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3702   }
3703 
3704   for (i=0;i<m;i++) {
3705     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3706     Ii    = i + rstart;
3707     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3708     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3709   }
3710   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3711   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3712   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3713 
3714   PetscFunctionReturn(0);
3715 }
3716 
3717 #undef __FUNCT__
3718 #define __FUNCT__ "MatFileSplit"
3719 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3720 {
3721   PetscErrorCode    ierr;
3722   PetscMPIInt       rank;
3723   PetscInt          m,N,i,rstart,nnz;
3724   size_t            len;
3725   const PetscInt    *indx;
3726   PetscViewer       out;
3727   char              *name;
3728   Mat               B;
3729   const PetscScalar *values;
3730 
3731   PetscFunctionBegin;
3732   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3733   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3734   /* Should this be the type of the diagonal block of A? */
3735   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3736   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3737   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3738   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3739   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3740   for (i=0;i<m;i++) {
3741     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3742     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3743     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3744   }
3745   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3746   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3747 
3748   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3749   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3750   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3751   sprintf(name,"%s.%d",outfile,rank);
3752   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3753   ierr = PetscFree(name);
3754   ierr = MatView(B,out);CHKERRQ(ierr);
3755   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3756   ierr = MatDestroy(B);CHKERRQ(ierr);
3757   PetscFunctionReturn(0);
3758 }
3759 
3760 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3761 #undef __FUNCT__
3762 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3763 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3764 {
3765   PetscErrorCode       ierr;
3766   Mat_Merge_SeqsToMPI  *merge;
3767   PetscContainer       container;
3768 
3769   PetscFunctionBegin;
3770   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3771   if (container) {
3772     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3773     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3774     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3775     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3776     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3777     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3778     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3779     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3780     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3781     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3782     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3783     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
3784 
3785     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
3786     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3787   }
3788   ierr = PetscFree(merge);CHKERRQ(ierr);
3789 
3790   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3791   PetscFunctionReturn(0);
3792 }
3793 
3794 #include "src/mat/utils/freespace.h"
3795 #include "petscbt.h"
3796 
3797 #undef __FUNCT__
3798 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3799 /*@C
3800       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3801                  matrices from each processor
3802 
3803     Collective on MPI_Comm
3804 
3805    Input Parameters:
3806 +    comm - the communicators the parallel matrix will live on
3807 .    seqmat - the input sequential matrices
3808 .    m - number of local rows (or PETSC_DECIDE)
3809 .    n - number of local columns (or PETSC_DECIDE)
3810 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3811 
3812    Output Parameter:
3813 .    mpimat - the parallel matrix generated
3814 
3815     Level: advanced
3816 
3817    Notes:
3818      The dimensions of the sequential matrix in each processor MUST be the same.
3819      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3820      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3821 @*/
3822 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3823 {
3824   PetscErrorCode       ierr;
3825   MPI_Comm             comm=((PetscObject)mpimat)->comm;
3826   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3827   PetscMPIInt          size,rank,taga,*len_s;
3828   PetscInt             N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j;
3829   PetscInt             proc,m;
3830   PetscInt             **buf_ri,**buf_rj;
3831   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3832   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3833   MPI_Request          *s_waits,*r_waits;
3834   MPI_Status           *status;
3835   MatScalar            *aa=a->a;
3836   MatScalar            **abuf_r,*ba_i;
3837   Mat_Merge_SeqsToMPI  *merge;
3838   PetscContainer       container;
3839 
3840   PetscFunctionBegin;
3841   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3842 
3843   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3844   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3845 
3846   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3847   if (container) {
3848     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3849   }
3850   bi     = merge->bi;
3851   bj     = merge->bj;
3852   buf_ri = merge->buf_ri;
3853   buf_rj = merge->buf_rj;
3854 
3855   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3856   owners = merge->rowmap.range;
3857   len_s  = merge->len_s;
3858 
3859   /* send and recv matrix values */
3860   /*-----------------------------*/
3861   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3862   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3863 
3864   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3865   for (proc=0,k=0; proc<size; proc++){
3866     if (!len_s[proc]) continue;
3867     i = owners[proc];
3868     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3869     k++;
3870   }
3871 
3872   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3873   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3874   ierr = PetscFree(status);CHKERRQ(ierr);
3875 
3876   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3877   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3878 
3879   /* insert mat values of mpimat */
3880   /*----------------------------*/
3881   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
3882   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3883   nextrow = buf_ri_k + merge->nrecv;
3884   nextai  = nextrow + merge->nrecv;
3885 
3886   for (k=0; k<merge->nrecv; k++){
3887     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3888     nrows = *(buf_ri_k[k]);
3889     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3890     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3891   }
3892 
3893   /* set values of ba */
3894   m = merge->rowmap.n;
3895   for (i=0; i<m; i++) {
3896     arow = owners[rank] + i;
3897     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3898     bnzi = bi[i+1] - bi[i];
3899     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
3900 
3901     /* add local non-zero vals of this proc's seqmat into ba */
3902     anzi = ai[arow+1] - ai[arow];
3903     aj   = a->j + ai[arow];
3904     aa   = a->a + ai[arow];
3905     nextaj = 0;
3906     for (j=0; nextaj<anzi; j++){
3907       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3908         ba_i[j] += aa[nextaj++];
3909       }
3910     }
3911 
3912     /* add received vals into ba */
3913     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3914       /* i-th row */
3915       if (i == *nextrow[k]) {
3916         anzi = *(nextai[k]+1) - *nextai[k];
3917         aj   = buf_rj[k] + *(nextai[k]);
3918         aa   = abuf_r[k] + *(nextai[k]);
3919         nextaj = 0;
3920         for (j=0; nextaj<anzi; j++){
3921           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3922             ba_i[j] += aa[nextaj++];
3923           }
3924         }
3925         nextrow[k]++; nextai[k]++;
3926       }
3927     }
3928     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3929   }
3930   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3931   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3932 
3933   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3934   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3935   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3936   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3937   PetscFunctionReturn(0);
3938 }
3939 
3940 #undef __FUNCT__
3941 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3942 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3943 {
3944   PetscErrorCode       ierr;
3945   Mat                  B_mpi;
3946   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3947   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3948   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3949   PetscInt             M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j;
3950   PetscInt             len,proc,*dnz,*onz;
3951   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3952   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3953   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3954   MPI_Status           *status;
3955   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
3956   PetscBT              lnkbt;
3957   Mat_Merge_SeqsToMPI  *merge;
3958   PetscContainer       container;
3959 
3960   PetscFunctionBegin;
3961   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3962 
3963   /* make sure it is a PETSc comm */
3964   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3965   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3966   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3967 
3968   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3969   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3970 
3971   /* determine row ownership */
3972   /*---------------------------------------------------------*/
3973   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
3974   merge->rowmap.n = m;
3975   merge->rowmap.N = M;
3976   merge->rowmap.bs = 1;
3977   ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr);
3978   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3979   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3980 
3981   m      = merge->rowmap.n;
3982   M      = merge->rowmap.N;
3983   owners = merge->rowmap.range;
3984 
3985   /* determine the number of messages to send, their lengths */
3986   /*---------------------------------------------------------*/
3987   len_s  = merge->len_s;
3988 
3989   len = 0;  /* length of buf_si[] */
3990   merge->nsend = 0;
3991   for (proc=0; proc<size; proc++){
3992     len_si[proc] = 0;
3993     if (proc == rank){
3994       len_s[proc] = 0;
3995     } else {
3996       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3997       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3998     }
3999     if (len_s[proc]) {
4000       merge->nsend++;
4001       nrows = 0;
4002       for (i=owners[proc]; i<owners[proc+1]; i++){
4003         if (ai[i+1] > ai[i]) nrows++;
4004       }
4005       len_si[proc] = 2*(nrows+1);
4006       len += len_si[proc];
4007     }
4008   }
4009 
4010   /* determine the number and length of messages to receive for ij-structure */
4011   /*-------------------------------------------------------------------------*/
4012   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4013   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4014 
4015   /* post the Irecv of j-structure */
4016   /*-------------------------------*/
4017   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4018   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4019 
4020   /* post the Isend of j-structure */
4021   /*--------------------------------*/
4022   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
4023   sj_waits = si_waits + merge->nsend;
4024 
4025   for (proc=0, k=0; proc<size; proc++){
4026     if (!len_s[proc]) continue;
4027     i = owners[proc];
4028     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4029     k++;
4030   }
4031 
4032   /* receives and sends of j-structure are complete */
4033   /*------------------------------------------------*/
4034   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4035   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4036 
4037   /* send and recv i-structure */
4038   /*---------------------------*/
4039   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4040   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4041 
4042   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4043   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4044   for (proc=0,k=0; proc<size; proc++){
4045     if (!len_s[proc]) continue;
4046     /* form outgoing message for i-structure:
4047          buf_si[0]:                 nrows to be sent
4048                [1:nrows]:           row index (global)
4049                [nrows+1:2*nrows+1]: i-structure index
4050     */
4051     /*-------------------------------------------*/
4052     nrows = len_si[proc]/2 - 1;
4053     buf_si_i    = buf_si + nrows+1;
4054     buf_si[0]   = nrows;
4055     buf_si_i[0] = 0;
4056     nrows = 0;
4057     for (i=owners[proc]; i<owners[proc+1]; i++){
4058       anzi = ai[i+1] - ai[i];
4059       if (anzi) {
4060         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4061         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4062         nrows++;
4063       }
4064     }
4065     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4066     k++;
4067     buf_si += len_si[proc];
4068   }
4069 
4070   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4071   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4072 
4073   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4074   for (i=0; i<merge->nrecv; i++){
4075     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4076   }
4077 
4078   ierr = PetscFree(len_si);CHKERRQ(ierr);
4079   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4080   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4081   ierr = PetscFree(si_waits);CHKERRQ(ierr);
4082   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4083   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4084   ierr = PetscFree(status);CHKERRQ(ierr);
4085 
4086   /* compute a local seq matrix in each processor */
4087   /*----------------------------------------------*/
4088   /* allocate bi array and free space for accumulating nonzero column info */
4089   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4090   bi[0] = 0;
4091 
4092   /* create and initialize a linked list */
4093   nlnk = N+1;
4094   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4095 
4096   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4097   len = 0;
4098   len  = ai[owners[rank+1]] - ai[owners[rank]];
4099   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4100   current_space = free_space;
4101 
4102   /* determine symbolic info for each local row */
4103   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4104   nextrow = buf_ri_k + merge->nrecv;
4105   nextai  = nextrow + merge->nrecv;
4106   for (k=0; k<merge->nrecv; k++){
4107     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4108     nrows = *buf_ri_k[k];
4109     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4110     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4111   }
4112 
4113   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4114   len = 0;
4115   for (i=0;i<m;i++) {
4116     bnzi   = 0;
4117     /* add local non-zero cols of this proc's seqmat into lnk */
4118     arow   = owners[rank] + i;
4119     anzi   = ai[arow+1] - ai[arow];
4120     aj     = a->j + ai[arow];
4121     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4122     bnzi += nlnk;
4123     /* add received col data into lnk */
4124     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4125       if (i == *nextrow[k]) { /* i-th row */
4126         anzi = *(nextai[k]+1) - *nextai[k];
4127         aj   = buf_rj[k] + *nextai[k];
4128         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4129         bnzi += nlnk;
4130         nextrow[k]++; nextai[k]++;
4131       }
4132     }
4133     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4134 
4135     /* if free space is not available, make more free space */
4136     if (current_space->local_remaining<bnzi) {
4137       ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
4138       nspacedouble++;
4139     }
4140     /* copy data into free space, then initialize lnk */
4141     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4142     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4143 
4144     current_space->array           += bnzi;
4145     current_space->local_used      += bnzi;
4146     current_space->local_remaining -= bnzi;
4147 
4148     bi[i+1] = bi[i] + bnzi;
4149   }
4150 
4151   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4152 
4153   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4154   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4155   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4156 
4157   /* create symbolic parallel matrix B_mpi */
4158   /*---------------------------------------*/
4159   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4160   if (n==PETSC_DECIDE) {
4161     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4162   } else {
4163     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4164   }
4165   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4166   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4167   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4168 
4169   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4170   B_mpi->assembled     = PETSC_FALSE;
4171   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4172   merge->bi            = bi;
4173   merge->bj            = bj;
4174   merge->buf_ri        = buf_ri;
4175   merge->buf_rj        = buf_rj;
4176   merge->coi           = PETSC_NULL;
4177   merge->coj           = PETSC_NULL;
4178   merge->owners_co     = PETSC_NULL;
4179 
4180   /* attach the supporting struct to B_mpi for reuse */
4181   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4182   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4183   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4184   *mpimat = B_mpi;
4185 
4186   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4187   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4188   PetscFunctionReturn(0);
4189 }
4190 
4191 #undef __FUNCT__
4192 #define __FUNCT__ "MatMerge_SeqsToMPI"
4193 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4194 {
4195   PetscErrorCode   ierr;
4196 
4197   PetscFunctionBegin;
4198   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4199   if (scall == MAT_INITIAL_MATRIX){
4200     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4201   }
4202   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4203   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4204   PetscFunctionReturn(0);
4205 }
4206 
4207 #undef __FUNCT__
4208 #define __FUNCT__ "MatGetLocalMat"
4209 /*@
4210      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4211 
4212     Not Collective
4213 
4214    Input Parameters:
4215 +    A - the matrix
4216 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4217 
4218    Output Parameter:
4219 .    A_loc - the local sequential matrix generated
4220 
4221     Level: developer
4222 
4223 @*/
4224 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4225 {
4226   PetscErrorCode  ierr;
4227   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4228   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4229   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4230   MatScalar       *aa=a->a,*ba=b->a,*cam;
4231   PetscScalar     *ca;
4232   PetscInt        am=A->rmap.n,i,j,k,cstart=A->cmap.rstart;
4233   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4234 
4235   PetscFunctionBegin;
4236   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4237   if (scall == MAT_INITIAL_MATRIX){
4238     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4239     ci[0] = 0;
4240     for (i=0; i<am; i++){
4241       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4242     }
4243     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4244     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4245     k = 0;
4246     for (i=0; i<am; i++) {
4247       ncols_o = bi[i+1] - bi[i];
4248       ncols_d = ai[i+1] - ai[i];
4249       /* off-diagonal portion of A */
4250       for (jo=0; jo<ncols_o; jo++) {
4251         col = cmap[*bj];
4252         if (col >= cstart) break;
4253         cj[k]   = col; bj++;
4254         ca[k++] = *ba++;
4255       }
4256       /* diagonal portion of A */
4257       for (j=0; j<ncols_d; j++) {
4258         cj[k]   = cstart + *aj++;
4259         ca[k++] = *aa++;
4260       }
4261       /* off-diagonal portion of A */
4262       for (j=jo; j<ncols_o; j++) {
4263         cj[k]   = cmap[*bj++];
4264         ca[k++] = *ba++;
4265       }
4266     }
4267     /* put together the new matrix */
4268     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4269     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4270     /* Since these are PETSc arrays, change flags to free them as necessary. */
4271     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4272     mat->free_a  = PETSC_TRUE;
4273     mat->free_ij = PETSC_TRUE;
4274     mat->nonew   = 0;
4275   } else if (scall == MAT_REUSE_MATRIX){
4276     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4277     ci = mat->i; cj = mat->j; cam = mat->a;
4278     for (i=0; i<am; i++) {
4279       /* off-diagonal portion of A */
4280       ncols_o = bi[i+1] - bi[i];
4281       for (jo=0; jo<ncols_o; jo++) {
4282         col = cmap[*bj];
4283         if (col >= cstart) break;
4284         *cam++ = *ba++; bj++;
4285       }
4286       /* diagonal portion of A */
4287       ncols_d = ai[i+1] - ai[i];
4288       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4289       /* off-diagonal portion of A */
4290       for (j=jo; j<ncols_o; j++) {
4291         *cam++ = *ba++; bj++;
4292       }
4293     }
4294   } else {
4295     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4296   }
4297 
4298   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4299   PetscFunctionReturn(0);
4300 }
4301 
4302 #undef __FUNCT__
4303 #define __FUNCT__ "MatGetLocalMatCondensed"
4304 /*@C
4305      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4306 
4307     Not Collective
4308 
4309    Input Parameters:
4310 +    A - the matrix
4311 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4312 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4313 
4314    Output Parameter:
4315 .    A_loc - the local sequential matrix generated
4316 
4317     Level: developer
4318 
4319 @*/
4320 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4321 {
4322   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4323   PetscErrorCode    ierr;
4324   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4325   IS                isrowa,iscola;
4326   Mat               *aloc;
4327 
4328   PetscFunctionBegin;
4329   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4330   if (!row){
4331     start = A->rmap.rstart; end = A->rmap.rend;
4332     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4333   } else {
4334     isrowa = *row;
4335   }
4336   if (!col){
4337     start = A->cmap.rstart;
4338     cmap  = a->garray;
4339     nzA   = a->A->cmap.n;
4340     nzB   = a->B->cmap.n;
4341     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4342     ncols = 0;
4343     for (i=0; i<nzB; i++) {
4344       if (cmap[i] < start) idx[ncols++] = cmap[i];
4345       else break;
4346     }
4347     imark = i;
4348     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4349     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4350     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4351     ierr = PetscFree(idx);CHKERRQ(ierr);
4352   } else {
4353     iscola = *col;
4354   }
4355   if (scall != MAT_INITIAL_MATRIX){
4356     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4357     aloc[0] = *A_loc;
4358   }
4359   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4360   *A_loc = aloc[0];
4361   ierr = PetscFree(aloc);CHKERRQ(ierr);
4362   if (!row){
4363     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4364   }
4365   if (!col){
4366     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4367   }
4368   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4369   PetscFunctionReturn(0);
4370 }
4371 
4372 #undef __FUNCT__
4373 #define __FUNCT__ "MatGetBrowsOfAcols"
4374 /*@C
4375     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4376 
4377     Collective on Mat
4378 
4379    Input Parameters:
4380 +    A,B - the matrices in mpiaij format
4381 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4382 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4383 
4384    Output Parameter:
4385 +    rowb, colb - index sets of rows and columns of B to extract
4386 .    brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows
4387 -    B_seq - the sequential matrix generated
4388 
4389     Level: developer
4390 
4391 @*/
4392 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4393 {
4394   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4395   PetscErrorCode    ierr;
4396   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4397   IS                isrowb,iscolb;
4398   Mat               *bseq;
4399 
4400   PetscFunctionBegin;
4401   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4402     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4403   }
4404   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4405 
4406   if (scall == MAT_INITIAL_MATRIX){
4407     start = A->cmap.rstart;
4408     cmap  = a->garray;
4409     nzA   = a->A->cmap.n;
4410     nzB   = a->B->cmap.n;
4411     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4412     ncols = 0;
4413     for (i=0; i<nzB; i++) {  /* row < local row index */
4414       if (cmap[i] < start) idx[ncols++] = cmap[i];
4415       else break;
4416     }
4417     imark = i;
4418     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4419     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4420     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4421     ierr = PetscFree(idx);CHKERRQ(ierr);
4422     *brstart = imark;
4423     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr);
4424   } else {
4425     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4426     isrowb = *rowb; iscolb = *colb;
4427     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4428     bseq[0] = *B_seq;
4429   }
4430   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4431   *B_seq = bseq[0];
4432   ierr = PetscFree(bseq);CHKERRQ(ierr);
4433   if (!rowb){
4434     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4435   } else {
4436     *rowb = isrowb;
4437   }
4438   if (!colb){
4439     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4440   } else {
4441     *colb = iscolb;
4442   }
4443   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4444   PetscFunctionReturn(0);
4445 }
4446 
4447 #undef __FUNCT__
4448 #define __FUNCT__ "MatGetBrowsOfAoCols"
4449 /*@C
4450     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4451     of the OFF-DIAGONAL portion of local A
4452 
4453     Collective on Mat
4454 
4455    Input Parameters:
4456 +    A,B - the matrices in mpiaij format
4457 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4458 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4459 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4460 
4461    Output Parameter:
4462 +    B_oth - the sequential matrix generated
4463 
4464     Level: developer
4465 
4466 @*/
4467 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth)
4468 {
4469   VecScatter_MPI_General *gen_to,*gen_from;
4470   PetscErrorCode         ierr;
4471   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4472   Mat_SeqAIJ             *b_oth;
4473   VecScatter             ctx=a->Mvctx;
4474   MPI_Comm               comm=((PetscObject)ctx)->comm;
4475   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4476   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj;
4477   PetscScalar            *rvalues,*svalues;
4478   MatScalar              *b_otha,*bufa,*bufA;
4479   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4480   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4481   MPI_Status             *sstatus,rstatus;
4482   PetscMPIInt            jj;
4483   PetscInt               *cols,sbs,rbs;
4484   PetscScalar            *vals;
4485 
4486   PetscFunctionBegin;
4487   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4488     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4489   }
4490   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4491   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4492 
4493   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4494   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4495   rvalues  = gen_from->values; /* holds the length of receiving row */
4496   svalues  = gen_to->values;   /* holds the length of sending row */
4497   nrecvs   = gen_from->n;
4498   nsends   = gen_to->n;
4499 
4500   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4501   srow     = gen_to->indices;   /* local row index to be sent */
4502   sstarts  = gen_to->starts;
4503   sprocs   = gen_to->procs;
4504   sstatus  = gen_to->sstatus;
4505   sbs      = gen_to->bs;
4506   rstarts  = gen_from->starts;
4507   rprocs   = gen_from->procs;
4508   rbs      = gen_from->bs;
4509 
4510   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4511   if (scall == MAT_INITIAL_MATRIX){
4512     /* i-array */
4513     /*---------*/
4514     /*  post receives */
4515     for (i=0; i<nrecvs; i++){
4516       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4517       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4518       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4519     }
4520 
4521     /* pack the outgoing message */
4522     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4523     rstartsj = sstartsj + nsends +1;
4524     sstartsj[0] = 0;  rstartsj[0] = 0;
4525     len = 0; /* total length of j or a array to be sent */
4526     k = 0;
4527     for (i=0; i<nsends; i++){
4528       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4529       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4530       for (j=0; j<nrows; j++) {
4531         row = srow[k] + B->rmap.range[rank]; /* global row idx */
4532         for (l=0; l<sbs; l++){
4533           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4534           rowlen[j*sbs+l] = ncols;
4535           len += ncols;
4536           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4537         }
4538         k++;
4539       }
4540       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4541       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4542     }
4543     /* recvs and sends of i-array are completed */
4544     i = nrecvs;
4545     while (i--) {
4546       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4547     }
4548     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4549 
4550     /* allocate buffers for sending j and a arrays */
4551     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4552     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4553 
4554     /* create i-array of B_oth */
4555     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4556     b_othi[0] = 0;
4557     len = 0; /* total length of j or a array to be received */
4558     k = 0;
4559     for (i=0; i<nrecvs; i++){
4560       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4561       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4562       for (j=0; j<nrows; j++) {
4563         b_othi[k+1] = b_othi[k] + rowlen[j];
4564         len += rowlen[j]; k++;
4565       }
4566       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4567     }
4568 
4569     /* allocate space for j and a arrrays of B_oth */
4570     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4571     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4572 
4573     /* j-array */
4574     /*---------*/
4575     /*  post receives of j-array */
4576     for (i=0; i<nrecvs; i++){
4577       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4578       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4579     }
4580 
4581     /* pack the outgoing message j-array */
4582     k = 0;
4583     for (i=0; i<nsends; i++){
4584       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4585       bufJ = bufj+sstartsj[i];
4586       for (j=0; j<nrows; j++) {
4587         row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4588         for (ll=0; ll<sbs; ll++){
4589           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4590           for (l=0; l<ncols; l++){
4591             *bufJ++ = cols[l];
4592           }
4593           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4594         }
4595       }
4596       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4597     }
4598 
4599     /* recvs and sends of j-array are completed */
4600     i = nrecvs;
4601     while (i--) {
4602       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4603     }
4604     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4605   } else if (scall == MAT_REUSE_MATRIX){
4606     sstartsj = *startsj;
4607     rstartsj = sstartsj + nsends +1;
4608     bufa     = *bufa_ptr;
4609     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4610     b_otha   = b_oth->a;
4611   } else {
4612     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4613   }
4614 
4615   /* a-array */
4616   /*---------*/
4617   /*  post receives of a-array */
4618   for (i=0; i<nrecvs; i++){
4619     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4620     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4621   }
4622 
4623   /* pack the outgoing message a-array */
4624   k = 0;
4625   for (i=0; i<nsends; i++){
4626     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4627     bufA = bufa+sstartsj[i];
4628     for (j=0; j<nrows; j++) {
4629       row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4630       for (ll=0; ll<sbs; ll++){
4631         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4632         for (l=0; l<ncols; l++){
4633           *bufA++ = vals[l];
4634         }
4635         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4636       }
4637     }
4638     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4639   }
4640   /* recvs and sends of a-array are completed */
4641   i = nrecvs;
4642   while (i--) {
4643     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4644   }
4645   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4646   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4647 
4648   if (scall == MAT_INITIAL_MATRIX){
4649     /* put together the new matrix */
4650     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4651 
4652     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4653     /* Since these are PETSc arrays, change flags to free them as necessary. */
4654     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4655     b_oth->free_a  = PETSC_TRUE;
4656     b_oth->free_ij = PETSC_TRUE;
4657     b_oth->nonew   = 0;
4658 
4659     ierr = PetscFree(bufj);CHKERRQ(ierr);
4660     if (!startsj || !bufa_ptr){
4661       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4662       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4663     } else {
4664       *startsj  = sstartsj;
4665       *bufa_ptr = bufa;
4666     }
4667   }
4668   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4669   PetscFunctionReturn(0);
4670 }
4671 
4672 #undef __FUNCT__
4673 #define __FUNCT__ "MatGetCommunicationStructs"
4674 /*@C
4675   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4676 
4677   Not Collective
4678 
4679   Input Parameters:
4680 . A - The matrix in mpiaij format
4681 
4682   Output Parameter:
4683 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4684 . colmap - A map from global column index to local index into lvec
4685 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4686 
4687   Level: developer
4688 
4689 @*/
4690 #if defined (PETSC_USE_CTABLE)
4691 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4692 #else
4693 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4694 #endif
4695 {
4696   Mat_MPIAIJ *a;
4697 
4698   PetscFunctionBegin;
4699   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4700   PetscValidPointer(lvec, 2)
4701   PetscValidPointer(colmap, 3)
4702   PetscValidPointer(multScatter, 4)
4703   a = (Mat_MPIAIJ *) A->data;
4704   if (lvec) *lvec = a->lvec;
4705   if (colmap) *colmap = a->colmap;
4706   if (multScatter) *multScatter = a->Mvctx;
4707   PetscFunctionReturn(0);
4708 }
4709 
4710 EXTERN_C_BEGIN
4711 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*);
4712 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*);
4713 EXTERN_C_END
4714 
4715 #include "src/mat/impls/dense/mpi/mpidense.h"
4716 
4717 #undef __FUNCT__
4718 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4719 /*
4720     Computes (B'*A')' since computing B*A directly is untenable
4721 
4722                n                       p                          p
4723         (              )       (              )         (                  )
4724       m (      A       )  *  n (       B      )   =   m (         C        )
4725         (              )       (              )         (                  )
4726 
4727 */
4728 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4729 {
4730   PetscErrorCode     ierr;
4731   Mat                At,Bt,Ct;
4732 
4733   PetscFunctionBegin;
4734   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4735   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4736   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4737   ierr = MatDestroy(At);CHKERRQ(ierr);
4738   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4739   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4740   ierr = MatDestroy(Ct);CHKERRQ(ierr);
4741   PetscFunctionReturn(0);
4742 }
4743 
4744 #undef __FUNCT__
4745 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4746 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4747 {
4748   PetscErrorCode ierr;
4749   PetscInt       m=A->rmap.n,n=B->cmap.n;
4750   Mat            Cmat;
4751 
4752   PetscFunctionBegin;
4753   if (A->cmap.n != B->rmap.n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap.n %d != B->rmap.n %d\n",A->cmap.n,B->rmap.n);
4754   ierr = MatCreate(A->hdr.comm,&Cmat);CHKERRQ(ierr);
4755   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4756   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4757   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
4758   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4759   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4760   *C   = Cmat;
4761   PetscFunctionReturn(0);
4762 }
4763 
4764 /* ----------------------------------------------------------------*/
4765 #undef __FUNCT__
4766 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4767 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4768 {
4769   PetscErrorCode ierr;
4770 
4771   PetscFunctionBegin;
4772   if (scall == MAT_INITIAL_MATRIX){
4773     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4774   }
4775   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4776   PetscFunctionReturn(0);
4777 }
4778 
4779 /*MC
4780    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4781 
4782    Options Database Keys:
4783 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4784 
4785   Level: beginner
4786 
4787 .seealso: MatCreateMPIAIJ()
4788 M*/
4789 
4790 EXTERN_C_BEGIN
4791 #undef __FUNCT__
4792 #define __FUNCT__ "MatCreate_MPIAIJ"
4793 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
4794 {
4795   Mat_MPIAIJ     *b;
4796   PetscErrorCode ierr;
4797   PetscMPIInt    size;
4798 
4799   PetscFunctionBegin;
4800   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
4801 
4802   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
4803   B->data         = (void*)b;
4804   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4805   B->factor       = 0;
4806   B->rmap.bs      = 1;
4807   B->assembled    = PETSC_FALSE;
4808   B->mapping      = 0;
4809 
4810   B->insertmode      = NOT_SET_VALUES;
4811   b->size            = size;
4812   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
4813 
4814   /* build cache for off array entries formed */
4815   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
4816   b->donotstash  = PETSC_FALSE;
4817   b->colmap      = 0;
4818   b->garray      = 0;
4819   b->roworiented = PETSC_TRUE;
4820 
4821   /* stuff used for matrix vector multiply */
4822   b->lvec      = PETSC_NULL;
4823   b->Mvctx     = PETSC_NULL;
4824 
4825   /* stuff for MatGetRow() */
4826   b->rowindices   = 0;
4827   b->rowvalues    = 0;
4828   b->getrowactive = PETSC_FALSE;
4829 
4830 
4831   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
4832                                      "MatStoreValues_MPIAIJ",
4833                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
4834   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
4835                                      "MatRetrieveValues_MPIAIJ",
4836                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
4837   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
4838 				     "MatGetDiagonalBlock_MPIAIJ",
4839                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
4840   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
4841 				     "MatIsTranspose_MPIAIJ",
4842 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
4843   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
4844 				     "MatMPIAIJSetPreallocation_MPIAIJ",
4845 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
4846   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
4847 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
4848 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
4849   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
4850 				     "MatDiagonalScaleLocal_MPIAIJ",
4851 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
4852   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
4853                                      "MatConvert_MPIAIJ_MPICSRPERM",
4854                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
4855   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
4856                                      "MatConvert_MPIAIJ_MPICRL",
4857                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
4858   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
4859                                      "MatMatMult_MPIDense_MPIAIJ",
4860                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
4861   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
4862                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
4863                                       MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
4864   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
4865                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
4866                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
4867   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
4868   PetscFunctionReturn(0);
4869 }
4870 EXTERN_C_END
4871 
4872 #undef __FUNCT__
4873 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
4874 /*@
4875      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
4876          and "off-diagonal" part of the matrix in CSR format.
4877 
4878    Collective on MPI_Comm
4879 
4880    Input Parameters:
4881 +  comm - MPI communicator
4882 .  m - number of local rows (Cannot be PETSC_DECIDE)
4883 .  n - This value should be the same as the local size used in creating the
4884        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4885        calculated if N is given) For square matrices n is almost always m.
4886 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4887 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4888 .   i - row indices for "diagonal" portion of matrix
4889 .   j - column indices
4890 .   a - matrix values
4891 .   oi - row indices for "off-diagonal" portion of matrix
4892 .   oj - column indices
4893 -   oa - matrix values
4894 
4895    Output Parameter:
4896 .   mat - the matrix
4897 
4898    Level: advanced
4899 
4900    Notes:
4901        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
4902 
4903        The i and j indices are 0 based
4904 
4905        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
4906 
4907 
4908 .keywords: matrix, aij, compressed row, sparse, parallel
4909 
4910 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4911           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
4912 @*/
4913 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
4914 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
4915 {
4916   PetscErrorCode ierr;
4917   Mat_MPIAIJ     *maij;
4918 
4919  PetscFunctionBegin;
4920   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4921   if (i[0]) {
4922     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4923   }
4924   if (oi[0]) {
4925     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
4926   }
4927   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4928   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4929   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4930   maij = (Mat_MPIAIJ*) (*mat)->data;
4931   maij->donotstash     = PETSC_TRUE;
4932   (*mat)->preallocated = PETSC_TRUE;
4933 
4934   (*mat)->rmap.bs = (*mat)->cmap.bs = 1;
4935   ierr = PetscMapSetUp(&(*mat)->rmap);CHKERRQ(ierr);
4936   ierr = PetscMapSetUp(&(*mat)->cmap);CHKERRQ(ierr);
4937 
4938   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
4939   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
4940 
4941   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4942   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4943   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4944   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4945 
4946   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4947   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4948   PetscFunctionReturn(0);
4949 }
4950 
4951 /*
4952     Special version for direct calls from Fortran
4953 */
4954 #if defined(PETSC_HAVE_FORTRAN_CAPS)
4955 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
4956 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
4957 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
4958 #endif
4959 
4960 /* Change these macros so can be used in void function */
4961 #undef CHKERRQ
4962 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr)
4963 #undef SETERRQ2
4964 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr)
4965 #undef SETERRQ
4966 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr)
4967 
4968 EXTERN_C_BEGIN
4969 #undef __FUNCT__
4970 #define __FUNCT__ "matsetvaluesmpiaij_"
4971 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
4972 {
4973   Mat             mat = *mmat;
4974   PetscInt        m = *mm, n = *mn;
4975   InsertMode      addv = *maddv;
4976   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
4977   PetscScalar     value;
4978   PetscErrorCode  ierr;
4979 
4980   MatPreallocated(mat);
4981   if (mat->insertmode == NOT_SET_VALUES) {
4982     mat->insertmode = addv;
4983   }
4984 #if defined(PETSC_USE_DEBUG)
4985   else if (mat->insertmode != addv) {
4986     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4987   }
4988 #endif
4989   {
4990   PetscInt        i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
4991   PetscInt        cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
4992   PetscTruth      roworiented = aij->roworiented;
4993 
4994   /* Some Variables required in the macro */
4995   Mat             A = aij->A;
4996   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
4997   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
4998   MatScalar       *aa = a->a;
4999   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5000   Mat             B = aij->B;
5001   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5002   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
5003   MatScalar       *ba = b->a;
5004 
5005   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5006   PetscInt        nonew = a->nonew;
5007   MatScalar       *ap1,*ap2;
5008 
5009   PetscFunctionBegin;
5010   for (i=0; i<m; i++) {
5011     if (im[i] < 0) continue;
5012 #if defined(PETSC_USE_DEBUG)
5013     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
5014 #endif
5015     if (im[i] >= rstart && im[i] < rend) {
5016       row      = im[i] - rstart;
5017       lastcol1 = -1;
5018       rp1      = aj + ai[row];
5019       ap1      = aa + ai[row];
5020       rmax1    = aimax[row];
5021       nrow1    = ailen[row];
5022       low1     = 0;
5023       high1    = nrow1;
5024       lastcol2 = -1;
5025       rp2      = bj + bi[row];
5026       ap2      = ba + bi[row];
5027       rmax2    = bimax[row];
5028       nrow2    = bilen[row];
5029       low2     = 0;
5030       high2    = nrow2;
5031 
5032       for (j=0; j<n; j++) {
5033         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5034         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5035         if (in[j] >= cstart && in[j] < cend){
5036           col = in[j] - cstart;
5037           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5038         } else if (in[j] < 0) continue;
5039 #if defined(PETSC_USE_DEBUG)
5040         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
5041 #endif
5042         else {
5043           if (mat->was_assembled) {
5044             if (!aij->colmap) {
5045               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5046             }
5047 #if defined (PETSC_USE_CTABLE)
5048             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5049 	    col--;
5050 #else
5051             col = aij->colmap[in[j]] - 1;
5052 #endif
5053             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5054               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5055               col =  in[j];
5056               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5057               B = aij->B;
5058               b = (Mat_SeqAIJ*)B->data;
5059               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5060               rp2      = bj + bi[row];
5061               ap2      = ba + bi[row];
5062               rmax2    = bimax[row];
5063               nrow2    = bilen[row];
5064               low2     = 0;
5065               high2    = nrow2;
5066               bm       = aij->B->rmap.n;
5067               ba = b->a;
5068             }
5069           } else col = in[j];
5070           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5071         }
5072       }
5073     } else {
5074       if (!aij->donotstash) {
5075         if (roworiented) {
5076           if (ignorezeroentries && v[i*n] == 0.0) continue;
5077           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
5078         } else {
5079           if (ignorezeroentries && v[i] == 0.0) continue;
5080           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
5081         }
5082       }
5083     }
5084   }}
5085   PetscFunctionReturnVoid();
5086 }
5087 EXTERN_C_END
5088 
5089