xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision fc4dec0ab267e831b41a38fdd32730b7f8abdc58)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscTruth     aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap.n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap.n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap.n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap.n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 
207 #define CHUNKSIZE   15
208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
209 { \
210     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
211     lastcol1 = col;\
212     while (high1-low1 > 5) { \
213       t = (low1+high1)/2; \
214       if (rp1[t] > col) high1 = t; \
215       else             low1  = t; \
216     } \
217       for (_i=low1; _i<high1; _i++) { \
218         if (rp1[_i] > col) break; \
219         if (rp1[_i] == col) { \
220           if (addv == ADD_VALUES) ap1[_i] += value;   \
221           else                    ap1[_i] = value; \
222           goto a_noinsert; \
223         } \
224       }  \
225       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
226       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
227       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
228       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
229       N = nrow1++ - 1; a->nz++; high1++; \
230       /* shift up all the later entries in this row */ \
231       for (ii=N; ii>=_i; ii--) { \
232         rp1[ii+1] = rp1[ii]; \
233         ap1[ii+1] = ap1[ii]; \
234       } \
235       rp1[_i] = col;  \
236       ap1[_i] = value;  \
237       a_noinsert: ; \
238       ailen[row] = nrow1; \
239 }
240 
241 
242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
243 { \
244     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
245     lastcol2 = col;\
246     while (high2-low2 > 5) { \
247       t = (low2+high2)/2; \
248       if (rp2[t] > col) high2 = t; \
249       else             low2  = t; \
250     } \
251     for (_i=low2; _i<high2; _i++) {		\
252       if (rp2[_i] > col) break;			\
253       if (rp2[_i] == col) {			      \
254 	if (addv == ADD_VALUES) ap2[_i] += value;     \
255 	else                    ap2[_i] = value;      \
256 	goto b_noinsert;			      \
257       }						      \
258     }							      \
259     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
260     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
261     if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
262     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
263     N = nrow2++ - 1; b->nz++; high2++;					\
264     /* shift up all the later entries in this row */			\
265     for (ii=N; ii>=_i; ii--) {						\
266       rp2[ii+1] = rp2[ii];						\
267       ap2[ii+1] = ap2[ii];						\
268     }									\
269     rp2[_i] = col;							\
270     ap2[_i] = value;							\
271     b_noinsert: ;								\
272     bilen[row] = nrow2;							\
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
278 {
279   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
280   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
281   PetscErrorCode ierr;
282   PetscInt       l,*garray = mat->garray,diag;
283 
284   PetscFunctionBegin;
285   /* code only works for square matrices A */
286 
287   /* find size of row to the left of the diagonal part */
288   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
289   row  = row - diag;
290   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
291     if (garray[b->j[b->i[row]+l]] > diag) break;
292   }
293   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
294 
295   /* diagonal part */
296   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
297 
298   /* right of diagonal part */
299   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
300   PetscFunctionReturn(0);
301 }
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "MatSetValues_MPIAIJ"
305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
306 {
307   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
308   PetscScalar    value;
309   PetscErrorCode ierr;
310   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
311   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
312   PetscTruth     roworiented = aij->roworiented;
313 
314   /* Some Variables required in the macro */
315   Mat            A = aij->A;
316   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
317   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
318   MatScalar      *aa = a->a;
319   PetscTruth     ignorezeroentries = a->ignorezeroentries;
320   Mat            B = aij->B;
321   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
322   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
323   MatScalar      *ba = b->a;
324 
325   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
326   PetscInt       nonew = a->nonew;
327   MatScalar      *ap1,*ap2;
328 
329   PetscFunctionBegin;
330   for (i=0; i<m; i++) {
331     if (im[i] < 0) continue;
332 #if defined(PETSC_USE_DEBUG)
333     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
334 #endif
335     if (im[i] >= rstart && im[i] < rend) {
336       row      = im[i] - rstart;
337       lastcol1 = -1;
338       rp1      = aj + ai[row];
339       ap1      = aa + ai[row];
340       rmax1    = aimax[row];
341       nrow1    = ailen[row];
342       low1     = 0;
343       high1    = nrow1;
344       lastcol2 = -1;
345       rp2      = bj + bi[row];
346       ap2      = ba + bi[row];
347       rmax2    = bimax[row];
348       nrow2    = bilen[row];
349       low2     = 0;
350       high2    = nrow2;
351 
352       for (j=0; j<n; j++) {
353         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
354         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
355         if (in[j] >= cstart && in[j] < cend){
356           col = in[j] - cstart;
357           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
358         } else if (in[j] < 0) continue;
359 #if defined(PETSC_USE_DEBUG)
360         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
361 #endif
362         else {
363           if (mat->was_assembled) {
364             if (!aij->colmap) {
365               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
366             }
367 #if defined (PETSC_USE_CTABLE)
368             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
369 	    col--;
370 #else
371             col = aij->colmap[in[j]] - 1;
372 #endif
373             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
374               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
375               col =  in[j];
376               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
377               B = aij->B;
378               b = (Mat_SeqAIJ*)B->data;
379               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
380               rp2      = bj + bi[row];
381               ap2      = ba + bi[row];
382               rmax2    = bimax[row];
383               nrow2    = bilen[row];
384               low2     = 0;
385               high2    = nrow2;
386               bm       = aij->B->rmap.n;
387               ba = b->a;
388             }
389           } else col = in[j];
390           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
391         }
392       }
393     } else {
394       if (!aij->donotstash) {
395         if (roworiented) {
396           if (ignorezeroentries && v[i*n] == 0.0) continue;
397           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
398         } else {
399           if (ignorezeroentries && v[i] == 0.0) continue;
400           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
401         }
402       }
403     }
404   }
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNCT__
409 #define __FUNCT__ "MatGetValues_MPIAIJ"
410 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
411 {
412   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
413   PetscErrorCode ierr;
414   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
415   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
416 
417   PetscFunctionBegin;
418   for (i=0; i<m; i++) {
419     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
420     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
421     if (idxm[i] >= rstart && idxm[i] < rend) {
422       row = idxm[i] - rstart;
423       for (j=0; j<n; j++) {
424         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
425         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
426         if (idxn[j] >= cstart && idxn[j] < cend){
427           col = idxn[j] - cstart;
428           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
429         } else {
430           if (!aij->colmap) {
431             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
432           }
433 #if defined (PETSC_USE_CTABLE)
434           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
435           col --;
436 #else
437           col = aij->colmap[idxn[j]] - 1;
438 #endif
439           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
440           else {
441             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
442           }
443         }
444       }
445     } else {
446       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
447     }
448   }
449   PetscFunctionReturn(0);
450 }
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscErrorCode ierr;
458   PetscInt       nstash,reallocs;
459   InsertMode     addv;
460 
461   PetscFunctionBegin;
462   if (aij->donotstash) {
463     PetscFunctionReturn(0);
464   }
465 
466   /* make sure all processors are either in INSERTMODE or ADDMODE */
467   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
468   if (addv == (ADD_VALUES|INSERT_VALUES)) {
469     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
470   }
471   mat->insertmode = addv; /* in case this processor had no cache */
472 
473   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr);
474   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
475   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
482 {
483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
484   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
485   PetscErrorCode ierr;
486   PetscMPIInt    n;
487   PetscInt       i,j,rstart,ncols,flg;
488   PetscInt       *row,*col;
489   PetscTruth     other_disassembled;
490   PetscScalar    *val;
491   InsertMode     addv = mat->insertmode;
492 
493   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
494   PetscFunctionBegin;
495   if (!aij->donotstash) {
496     while (1) {
497       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
498       if (!flg) break;
499 
500       for (i=0; i<n;) {
501         /* Now identify the consecutive vals belonging to the same row */
502         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
503         if (j < n) ncols = j-i;
504         else       ncols = n-i;
505         /* Now assemble all these values with a single function call */
506         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
507         i = j;
508       }
509     }
510     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
511   }
512   a->compressedrow.use     = PETSC_FALSE;
513   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
514   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
515 
516   /* determine if any processor has disassembled, if so we must
517      also disassemble ourselfs, in order that we may reassemble. */
518   /*
519      if nonzero structure of submatrix B cannot change then we know that
520      no processor disassembled thus we can skip this stuff
521   */
522   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
523     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
524     if (mat->was_assembled && !other_disassembled) {
525       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
526     }
527   }
528   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
529     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
530   }
531   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
532   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
533   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
534   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
535 
536   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
537   aij->rowvalues = 0;
538 
539   /* used by MatAXPY() */
540   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
541   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
542 
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
549 {
550   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
551   PetscErrorCode ierr;
552 
553   PetscFunctionBegin;
554   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
555   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNCT__
560 #define __FUNCT__ "MatZeroRows_MPIAIJ"
561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
562 {
563   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
564   PetscErrorCode ierr;
565   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
566   PetscInt       i,*owners = A->rmap.range;
567   PetscInt       *nprocs,j,idx,nsends,row;
568   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
569   PetscInt       *rvalues,count,base,slen,*source;
570   PetscInt       *lens,*lrows,*values,rstart=A->rmap.rstart;
571   MPI_Comm       comm = ((PetscObject)A)->comm;
572   MPI_Request    *send_waits,*recv_waits;
573   MPI_Status     recv_status,*send_status;
574 #if defined(PETSC_DEBUG)
575   PetscTruth     found = PETSC_FALSE;
576 #endif
577 
578   PetscFunctionBegin;
579   /*  first count number of contributors to each processor */
580   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
581   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
582   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
583   j = 0;
584   for (i=0; i<N; i++) {
585     if (lastidx > (idx = rows[i])) j = 0;
586     lastidx = idx;
587     for (; j<size; j++) {
588       if (idx >= owners[j] && idx < owners[j+1]) {
589         nprocs[2*j]++;
590         nprocs[2*j+1] = 1;
591         owner[i] = j;
592 #if defined(PETSC_DEBUG)
593         found = PETSC_TRUE;
594 #endif
595         break;
596       }
597     }
598 #if defined(PETSC_DEBUG)
599     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
600     found = PETSC_FALSE;
601 #endif
602   }
603   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
604 
605   /* inform other processors of number of messages and max length*/
606   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
607 
608   /* post receives:   */
609   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
610   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
611   for (i=0; i<nrecvs; i++) {
612     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
613   }
614 
615   /* do sends:
616       1) starts[i] gives the starting index in svalues for stuff going to
617          the ith processor
618   */
619   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
620   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
621   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
622   starts[0] = 0;
623   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
624   for (i=0; i<N; i++) {
625     svalues[starts[owner[i]]++] = rows[i];
626   }
627 
628   starts[0] = 0;
629   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
630   count = 0;
631   for (i=0; i<size; i++) {
632     if (nprocs[2*i+1]) {
633       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
634     }
635   }
636   ierr = PetscFree(starts);CHKERRQ(ierr);
637 
638   base = owners[rank];
639 
640   /*  wait on receives */
641   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
642   source = lens + nrecvs;
643   count  = nrecvs; slen = 0;
644   while (count) {
645     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
646     /* unpack receives into our local space */
647     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
648     source[imdex]  = recv_status.MPI_SOURCE;
649     lens[imdex]    = n;
650     slen          += n;
651     count--;
652   }
653   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
654 
655   /* move the data into the send scatter */
656   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
657   count = 0;
658   for (i=0; i<nrecvs; i++) {
659     values = rvalues + i*nmax;
660     for (j=0; j<lens[i]; j++) {
661       lrows[count++] = values[j] - base;
662     }
663   }
664   ierr = PetscFree(rvalues);CHKERRQ(ierr);
665   ierr = PetscFree(lens);CHKERRQ(ierr);
666   ierr = PetscFree(owner);CHKERRQ(ierr);
667   ierr = PetscFree(nprocs);CHKERRQ(ierr);
668 
669   /* actually zap the local rows */
670   /*
671         Zero the required rows. If the "diagonal block" of the matrix
672      is square and the user wishes to set the diagonal we use separate
673      code so that MatSetValues() is not called for each diagonal allocating
674      new memory, thus calling lots of mallocs and slowing things down.
675 
676        Contributed by: Matthew Knepley
677   */
678   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
679   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
680   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
681     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
682   } else if (diag != 0.0) {
683     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
684     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
685       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
687     }
688     for (i = 0; i < slen; i++) {
689       row  = lrows[i] + rstart;
690       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
691     }
692     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
693     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
694   } else {
695     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
696   }
697   ierr = PetscFree(lrows);CHKERRQ(ierr);
698 
699   /* wait on sends */
700   if (nsends) {
701     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
702     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
703     ierr = PetscFree(send_status);CHKERRQ(ierr);
704   }
705   ierr = PetscFree(send_waits);CHKERRQ(ierr);
706   ierr = PetscFree(svalues);CHKERRQ(ierr);
707 
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNCT__
712 #define __FUNCT__ "MatMult_MPIAIJ"
713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
714 {
715   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
716   PetscErrorCode ierr;
717   PetscInt       nt;
718 
719   PetscFunctionBegin;
720   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
721   if (nt != A->cmap.n) {
722     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt);
723   }
724   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
725   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
726   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
727   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
728   PetscFunctionReturn(0);
729 }
730 
731 #undef __FUNCT__
732 #define __FUNCT__ "MatMultAdd_MPIAIJ"
733 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
734 {
735   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
736   PetscErrorCode ierr;
737 
738   PetscFunctionBegin;
739   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
740   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
741   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
742   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
743   PetscFunctionReturn(0);
744 }
745 
746 #undef __FUNCT__
747 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
748 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
749 {
750   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
751   PetscErrorCode ierr;
752   PetscTruth     merged;
753 
754   PetscFunctionBegin;
755   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
756   /* do nondiagonal part */
757   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
758   if (!merged) {
759     /* send it on its way */
760     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
761     /* do local part */
762     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
763     /* receive remote parts: note this assumes the values are not actually */
764     /* added in yy until the next line, */
765     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
766   } else {
767     /* do local part */
768     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
769     /* send it on its way */
770     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
771     /* values actually were received in the Begin() but we need to call this nop */
772     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
773   }
774   PetscFunctionReturn(0);
775 }
776 
777 EXTERN_C_BEGIN
778 #undef __FUNCT__
779 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
780 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
781 {
782   MPI_Comm       comm;
783   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
784   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
785   IS             Me,Notme;
786   PetscErrorCode ierr;
787   PetscInt       M,N,first,last,*notme,i;
788   PetscMPIInt    size;
789 
790   PetscFunctionBegin;
791 
792   /* Easy test: symmetric diagonal block */
793   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
794   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
795   if (!*f) PetscFunctionReturn(0);
796   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
797   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
798   if (size == 1) PetscFunctionReturn(0);
799 
800   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
801   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
802   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
803   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
804   for (i=0; i<first; i++) notme[i] = i;
805   for (i=last; i<M; i++) notme[i-last+first] = i;
806   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
807   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
808   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
809   Aoff = Aoffs[0];
810   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
811   Boff = Boffs[0];
812   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
813   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
814   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
815   ierr = ISDestroy(Me);CHKERRQ(ierr);
816   ierr = ISDestroy(Notme);CHKERRQ(ierr);
817 
818   PetscFunctionReturn(0);
819 }
820 EXTERN_C_END
821 
822 #undef __FUNCT__
823 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
824 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
825 {
826   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
827   PetscErrorCode ierr;
828 
829   PetscFunctionBegin;
830   /* do nondiagonal part */
831   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
832   /* send it on its way */
833   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
834   /* do local part */
835   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
836   /* receive remote parts */
837   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
838   PetscFunctionReturn(0);
839 }
840 
841 /*
842   This only works correctly for square matrices where the subblock A->A is the
843    diagonal block
844 */
845 #undef __FUNCT__
846 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
847 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
848 {
849   PetscErrorCode ierr;
850   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
851 
852   PetscFunctionBegin;
853   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
854   if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) {
855     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
856   }
857   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
858   PetscFunctionReturn(0);
859 }
860 
861 #undef __FUNCT__
862 #define __FUNCT__ "MatScale_MPIAIJ"
863 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
864 {
865   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
866   PetscErrorCode ierr;
867 
868   PetscFunctionBegin;
869   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
870   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
871   PetscFunctionReturn(0);
872 }
873 
874 #undef __FUNCT__
875 #define __FUNCT__ "MatDestroy_MPIAIJ"
876 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
877 {
878   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
879   PetscErrorCode ierr;
880 
881   PetscFunctionBegin;
882 #if defined(PETSC_USE_LOG)
883   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
884 #endif
885   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
886   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
887   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
888 #if defined (PETSC_USE_CTABLE)
889   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
890 #else
891   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
892 #endif
893   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
894   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
895   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
896   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
897   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
898   ierr = PetscFree(aij);CHKERRQ(ierr);
899 
900   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
901   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
902   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
903   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
904   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
905   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
906   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
907   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
908   PetscFunctionReturn(0);
909 }
910 
911 #undef __FUNCT__
912 #define __FUNCT__ "MatView_MPIAIJ_Binary"
913 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
914 {
915   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
916   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
917   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
918   PetscErrorCode    ierr;
919   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
920   int               fd;
921   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
922   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz;
923   PetscScalar       *column_values;
924 
925   PetscFunctionBegin;
926   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
927   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
928   nz   = A->nz + B->nz;
929   if (!rank) {
930     header[0] = MAT_FILE_COOKIE;
931     header[1] = mat->rmap.N;
932     header[2] = mat->cmap.N;
933     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
934     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
935     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
936     /* get largest number of rows any processor has */
937     rlen = mat->rmap.n;
938     range = mat->rmap.range;
939     for (i=1; i<size; i++) {
940       rlen = PetscMax(rlen,range[i+1] - range[i]);
941     }
942   } else {
943     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
944     rlen = mat->rmap.n;
945   }
946 
947   /* load up the local row counts */
948   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
949   for (i=0; i<mat->rmap.n; i++) {
950     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
951   }
952 
953   /* store the row lengths to the file */
954   if (!rank) {
955     MPI_Status status;
956     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
957     for (i=1; i<size; i++) {
958       rlen = range[i+1] - range[i];
959       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
960       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
961     }
962   } else {
963     ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
964   }
965   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
966 
967   /* load up the local column indices */
968   nzmax = nz; /* )th processor needs space a largest processor needs */
969   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
970   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
971   cnt  = 0;
972   for (i=0; i<mat->rmap.n; i++) {
973     for (j=B->i[i]; j<B->i[i+1]; j++) {
974       if ( (col = garray[B->j[j]]) > cstart) break;
975       column_indices[cnt++] = col;
976     }
977     for (k=A->i[i]; k<A->i[i+1]; k++) {
978       column_indices[cnt++] = A->j[k] + cstart;
979     }
980     for (; j<B->i[i+1]; j++) {
981       column_indices[cnt++] = garray[B->j[j]];
982     }
983   }
984   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
985 
986   /* store the column indices to the file */
987   if (!rank) {
988     MPI_Status status;
989     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
990     for (i=1; i<size; i++) {
991       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
992       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
993       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
994       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
995     }
996   } else {
997     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
998     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
999   }
1000   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1001 
1002   /* load up the local column values */
1003   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1004   cnt  = 0;
1005   for (i=0; i<mat->rmap.n; i++) {
1006     for (j=B->i[i]; j<B->i[i+1]; j++) {
1007       if ( garray[B->j[j]] > cstart) break;
1008       column_values[cnt++] = B->a[j];
1009     }
1010     for (k=A->i[i]; k<A->i[i+1]; k++) {
1011       column_values[cnt++] = A->a[k];
1012     }
1013     for (; j<B->i[i+1]; j++) {
1014       column_values[cnt++] = B->a[j];
1015     }
1016   }
1017   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1018 
1019   /* store the column values to the file */
1020   if (!rank) {
1021     MPI_Status status;
1022     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1023     for (i=1; i<size; i++) {
1024       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1025       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1026       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1027       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1028     }
1029   } else {
1030     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1031     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1032   }
1033   ierr = PetscFree(column_values);CHKERRQ(ierr);
1034   PetscFunctionReturn(0);
1035 }
1036 
1037 #undef __FUNCT__
1038 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1039 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1040 {
1041   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1042   PetscErrorCode    ierr;
1043   PetscMPIInt       rank = aij->rank,size = aij->size;
1044   PetscTruth        isdraw,iascii,isbinary;
1045   PetscViewer       sviewer;
1046   PetscViewerFormat format;
1047 
1048   PetscFunctionBegin;
1049   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1050   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1051   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1052   if (iascii) {
1053     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1054     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1055       MatInfo    info;
1056       PetscTruth inodes;
1057 
1058       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1059       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1060       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1061       if (!inodes) {
1062         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1063 					      rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1064       } else {
1065         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1066 		    rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1067       }
1068       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1069       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1070       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1071       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1072       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1073       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1074       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1075       PetscFunctionReturn(0);
1076     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1077       PetscInt   inodecount,inodelimit,*inodes;
1078       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1079       if (inodes) {
1080         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1081       } else {
1082         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1083       }
1084       PetscFunctionReturn(0);
1085     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1086       PetscFunctionReturn(0);
1087     }
1088   } else if (isbinary) {
1089     if (size == 1) {
1090       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1091       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1092     } else {
1093       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1094     }
1095     PetscFunctionReturn(0);
1096   } else if (isdraw) {
1097     PetscDraw  draw;
1098     PetscTruth isnull;
1099     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1100     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1101   }
1102 
1103   if (size == 1) {
1104     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1105     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1106   } else {
1107     /* assemble the entire matrix onto first processor. */
1108     Mat         A;
1109     Mat_SeqAIJ  *Aloc;
1110     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
1111     MatScalar   *a;
1112 
1113     if (mat->rmap.N > 1024) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 512 rows, use binary format instead");
1114 
1115     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1116     if (!rank) {
1117       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1118     } else {
1119       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1120     }
1121     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1122     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1123     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1124     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1125 
1126     /* copy over the A part */
1127     Aloc = (Mat_SeqAIJ*)aij->A->data;
1128     m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1129     row = mat->rmap.rstart;
1130     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;}
1131     for (i=0; i<m; i++) {
1132       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1133       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1134     }
1135     aj = Aloc->j;
1136     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;}
1137 
1138     /* copy over the B part */
1139     Aloc = (Mat_SeqAIJ*)aij->B->data;
1140     m    = aij->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1141     row  = mat->rmap.rstart;
1142     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1143     ct   = cols;
1144     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1145     for (i=0; i<m; i++) {
1146       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1147       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1148     }
1149     ierr = PetscFree(ct);CHKERRQ(ierr);
1150     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1151     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1152     /*
1153        Everyone has to call to draw the matrix since the graphics waits are
1154        synchronized across all processors that share the PetscDraw object
1155     */
1156     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1157     if (!rank) {
1158       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1159       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1160     }
1161     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1162     ierr = MatDestroy(A);CHKERRQ(ierr);
1163   }
1164   PetscFunctionReturn(0);
1165 }
1166 
1167 #undef __FUNCT__
1168 #define __FUNCT__ "MatView_MPIAIJ"
1169 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1170 {
1171   PetscErrorCode ierr;
1172   PetscTruth     iascii,isdraw,issocket,isbinary;
1173 
1174   PetscFunctionBegin;
1175   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1176   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1177   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1178   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1179   if (iascii || isdraw || isbinary || issocket) {
1180     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1181   } else {
1182     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1183   }
1184   PetscFunctionReturn(0);
1185 }
1186 
1187 #undef __FUNCT__
1188 #define __FUNCT__ "MatRelax_MPIAIJ"
1189 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1190 {
1191   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1192   PetscErrorCode ierr;
1193   Vec            bb1;
1194 
1195   PetscFunctionBegin;
1196   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1197 
1198   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1199     if (flag & SOR_ZERO_INITIAL_GUESS) {
1200       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1201       its--;
1202     }
1203 
1204     while (its--) {
1205       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1206       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1207 
1208       /* update rhs: bb1 = bb - B*x */
1209       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1210       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1211 
1212       /* local sweep */
1213       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
1214     }
1215   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1216     if (flag & SOR_ZERO_INITIAL_GUESS) {
1217       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1218       its--;
1219     }
1220     while (its--) {
1221       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1222       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1223 
1224       /* update rhs: bb1 = bb - B*x */
1225       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1226       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1227 
1228       /* local sweep */
1229       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1230     }
1231   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1232     if (flag & SOR_ZERO_INITIAL_GUESS) {
1233       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1234       its--;
1235     }
1236     while (its--) {
1237       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1238       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1239 
1240       /* update rhs: bb1 = bb - B*x */
1241       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1242       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1243 
1244       /* local sweep */
1245       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1246     }
1247   } else {
1248     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1249   }
1250 
1251   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 #undef __FUNCT__
1256 #define __FUNCT__ "MatPermute_MPIAIJ"
1257 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1258 {
1259   MPI_Comm       comm,pcomm;
1260   PetscInt       first,local_size,nrows,*rows;
1261   int            ntids;
1262   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1263   PetscErrorCode ierr;
1264 
1265   PetscFunctionBegin;
1266   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1267   /* make a collective version of 'rowp' */
1268   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1269   if (pcomm==comm) {
1270     crowp = rowp;
1271   } else {
1272     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1273     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1274     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1275     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1276   }
1277   /* collect the global row permutation and invert it */
1278   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1279   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1280   if (pcomm!=comm) {
1281     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1282   }
1283   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1284   /* get the local target indices */
1285   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1286   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1287   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1288   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1289   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1290   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1291   /* the column permutation is so much easier;
1292      make a local version of 'colp' and invert it */
1293   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1294   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1295   if (ntids==1) {
1296     lcolp = colp;
1297   } else {
1298     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1299     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1300     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1301   }
1302   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1303   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1304   if (ntids>1) {
1305     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1306     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1307   }
1308   /* now we just get the submatrix */
1309   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1310   /* clean up */
1311   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1312   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1313   PetscFunctionReturn(0);
1314 }
1315 
1316 #undef __FUNCT__
1317 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1318 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1319 {
1320   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1321   Mat            A = mat->A,B = mat->B;
1322   PetscErrorCode ierr;
1323   PetscReal      isend[5],irecv[5];
1324 
1325   PetscFunctionBegin;
1326   info->block_size     = 1.0;
1327   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1328   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1329   isend[3] = info->memory;  isend[4] = info->mallocs;
1330   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1331   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1332   isend[3] += info->memory;  isend[4] += info->mallocs;
1333   if (flag == MAT_LOCAL) {
1334     info->nz_used      = isend[0];
1335     info->nz_allocated = isend[1];
1336     info->nz_unneeded  = isend[2];
1337     info->memory       = isend[3];
1338     info->mallocs      = isend[4];
1339   } else if (flag == MAT_GLOBAL_MAX) {
1340     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1341     info->nz_used      = irecv[0];
1342     info->nz_allocated = irecv[1];
1343     info->nz_unneeded  = irecv[2];
1344     info->memory       = irecv[3];
1345     info->mallocs      = irecv[4];
1346   } else if (flag == MAT_GLOBAL_SUM) {
1347     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1348     info->nz_used      = irecv[0];
1349     info->nz_allocated = irecv[1];
1350     info->nz_unneeded  = irecv[2];
1351     info->memory       = irecv[3];
1352     info->mallocs      = irecv[4];
1353   }
1354   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1355   info->fill_ratio_needed = 0;
1356   info->factor_mallocs    = 0;
1357   info->rows_global       = (double)matin->rmap.N;
1358   info->columns_global    = (double)matin->cmap.N;
1359   info->rows_local        = (double)matin->rmap.n;
1360   info->columns_local     = (double)matin->cmap.N;
1361 
1362   PetscFunctionReturn(0);
1363 }
1364 
1365 #undef __FUNCT__
1366 #define __FUNCT__ "MatSetOption_MPIAIJ"
1367 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1368 {
1369   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1370   PetscErrorCode ierr;
1371 
1372   PetscFunctionBegin;
1373   switch (op) {
1374   case MAT_NEW_NONZERO_LOCATIONS:
1375   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1376   case MAT_KEEP_ZEROED_ROWS:
1377   case MAT_NEW_NONZERO_LOCATION_ERR:
1378   case MAT_USE_INODES:
1379   case MAT_IGNORE_ZERO_ENTRIES:
1380     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1381     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1382     break;
1383   case MAT_ROW_ORIENTED:
1384     a->roworiented = flg;
1385     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1386     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1387     break;
1388   case MAT_NEW_DIAGONALS:
1389     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1390     break;
1391   case MAT_IGNORE_OFF_PROC_ENTRIES:
1392     a->donotstash = PETSC_TRUE;
1393     break;
1394   case MAT_SYMMETRIC:
1395     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1396     break;
1397   case MAT_STRUCTURALLY_SYMMETRIC:
1398   case MAT_HERMITIAN:
1399   case MAT_SYMMETRY_ETERNAL:
1400     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1401     break;
1402   default:
1403     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1404   }
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 #undef __FUNCT__
1409 #define __FUNCT__ "MatGetRow_MPIAIJ"
1410 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1411 {
1412   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1413   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1414   PetscErrorCode ierr;
1415   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart;
1416   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend;
1417   PetscInt       *cmap,*idx_p;
1418 
1419   PetscFunctionBegin;
1420   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1421   mat->getrowactive = PETSC_TRUE;
1422 
1423   if (!mat->rowvalues && (idx || v)) {
1424     /*
1425         allocate enough space to hold information from the longest row.
1426     */
1427     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1428     PetscInt     max = 1,tmp;
1429     for (i=0; i<matin->rmap.n; i++) {
1430       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1431       if (max < tmp) { max = tmp; }
1432     }
1433     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1434     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1435   }
1436 
1437   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1438   lrow = row - rstart;
1439 
1440   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1441   if (!v)   {pvA = 0; pvB = 0;}
1442   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1443   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1444   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1445   nztot = nzA + nzB;
1446 
1447   cmap  = mat->garray;
1448   if (v  || idx) {
1449     if (nztot) {
1450       /* Sort by increasing column numbers, assuming A and B already sorted */
1451       PetscInt imark = -1;
1452       if (v) {
1453         *v = v_p = mat->rowvalues;
1454         for (i=0; i<nzB; i++) {
1455           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1456           else break;
1457         }
1458         imark = i;
1459         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1460         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1461       }
1462       if (idx) {
1463         *idx = idx_p = mat->rowindices;
1464         if (imark > -1) {
1465           for (i=0; i<imark; i++) {
1466             idx_p[i] = cmap[cworkB[i]];
1467           }
1468         } else {
1469           for (i=0; i<nzB; i++) {
1470             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1471             else break;
1472           }
1473           imark = i;
1474         }
1475         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1476         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1477       }
1478     } else {
1479       if (idx) *idx = 0;
1480       if (v)   *v   = 0;
1481     }
1482   }
1483   *nz = nztot;
1484   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1485   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1486   PetscFunctionReturn(0);
1487 }
1488 
1489 #undef __FUNCT__
1490 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1491 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1492 {
1493   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1494 
1495   PetscFunctionBegin;
1496   if (!aij->getrowactive) {
1497     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1498   }
1499   aij->getrowactive = PETSC_FALSE;
1500   PetscFunctionReturn(0);
1501 }
1502 
1503 #undef __FUNCT__
1504 #define __FUNCT__ "MatNorm_MPIAIJ"
1505 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1506 {
1507   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1508   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1509   PetscErrorCode ierr;
1510   PetscInt       i,j,cstart = mat->cmap.rstart;
1511   PetscReal      sum = 0.0;
1512   MatScalar      *v;
1513 
1514   PetscFunctionBegin;
1515   if (aij->size == 1) {
1516     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1517   } else {
1518     if (type == NORM_FROBENIUS) {
1519       v = amat->a;
1520       for (i=0; i<amat->nz; i++) {
1521 #if defined(PETSC_USE_COMPLEX)
1522         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1523 #else
1524         sum += (*v)*(*v); v++;
1525 #endif
1526       }
1527       v = bmat->a;
1528       for (i=0; i<bmat->nz; i++) {
1529 #if defined(PETSC_USE_COMPLEX)
1530         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1531 #else
1532         sum += (*v)*(*v); v++;
1533 #endif
1534       }
1535       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1536       *norm = sqrt(*norm);
1537     } else if (type == NORM_1) { /* max column norm */
1538       PetscReal *tmp,*tmp2;
1539       PetscInt  *jj,*garray = aij->garray;
1540       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1541       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1542       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
1543       *norm = 0.0;
1544       v = amat->a; jj = amat->j;
1545       for (j=0; j<amat->nz; j++) {
1546         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1547       }
1548       v = bmat->a; jj = bmat->j;
1549       for (j=0; j<bmat->nz; j++) {
1550         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1551       }
1552       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1553       for (j=0; j<mat->cmap.N; j++) {
1554         if (tmp2[j] > *norm) *norm = tmp2[j];
1555       }
1556       ierr = PetscFree(tmp);CHKERRQ(ierr);
1557       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1558     } else if (type == NORM_INFINITY) { /* max row norm */
1559       PetscReal ntemp = 0.0;
1560       for (j=0; j<aij->A->rmap.n; j++) {
1561         v = amat->a + amat->i[j];
1562         sum = 0.0;
1563         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1564           sum += PetscAbsScalar(*v); v++;
1565         }
1566         v = bmat->a + bmat->i[j];
1567         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1568           sum += PetscAbsScalar(*v); v++;
1569         }
1570         if (sum > ntemp) ntemp = sum;
1571       }
1572       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1573     } else {
1574       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1575     }
1576   }
1577   PetscFunctionReturn(0);
1578 }
1579 
1580 #undef __FUNCT__
1581 #define __FUNCT__ "MatTranspose_MPIAIJ"
1582 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1583 {
1584   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1585   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1586   PetscErrorCode ierr;
1587   PetscInt       M = A->rmap.N,N = A->cmap.N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,i,*d_nnz;
1588   PetscInt       cstart=A->cmap.rstart,ncol;
1589   Mat            B;
1590   MatScalar      *array;
1591 
1592   PetscFunctionBegin;
1593   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1594 
1595   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1596     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1597     ma = A->rmap.n; na = A->cmap.n; mb = a->B->rmap.n;
1598     ai = Aloc->i; aj = Aloc->j;
1599     bi = Bloc->i; bj = Bloc->j;
1600     ierr = PetscMalloc((1+na+bi[mb])*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1601     cols = d_nnz + na + 1; /* work space to be used by B part */
1602     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1603     for (i=0; i<ai[ma]; i++){
1604       d_nnz[aj[i]] ++;
1605       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1606     }
1607 
1608     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1609     ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1610     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1611     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1612   } else {
1613     B = *matout;
1614   }
1615 
1616   /* copy over the A part */
1617   array = Aloc->a;
1618   row = A->rmap.rstart;
1619   for (i=0; i<ma; i++) {
1620     ncol = ai[i+1]-ai[i];
1621     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1622     row++; array += ncol; aj += ncol;
1623   }
1624   aj = Aloc->j;
1625   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1626 
1627   /* copy over the B part */
1628   array = Bloc->a;
1629   row = A->rmap.rstart;
1630   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1631   for (i=0; i<mb; i++) {
1632     ncol = bi[i+1]-bi[i];
1633     ierr = MatSetValues(B,ncol,cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1634     row++; array += ncol; cols += ncol;
1635   }
1636   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1637   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1638   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1639   if (*matout != A) {
1640     *matout = B;
1641   } else {
1642     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1643   }
1644   PetscFunctionReturn(0);
1645 }
1646 
1647 #undef __FUNCT__
1648 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1649 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1650 {
1651   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1652   Mat            a = aij->A,b = aij->B;
1653   PetscErrorCode ierr;
1654   PetscInt       s1,s2,s3;
1655 
1656   PetscFunctionBegin;
1657   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1658   if (rr) {
1659     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1660     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1661     /* Overlap communication with computation. */
1662     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1663   }
1664   if (ll) {
1665     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1666     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1667     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1668   }
1669   /* scale  the diagonal block */
1670   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1671 
1672   if (rr) {
1673     /* Do a scatter end and then right scale the off-diagonal block */
1674     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1675     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1676   }
1677 
1678   PetscFunctionReturn(0);
1679 }
1680 
1681 #undef __FUNCT__
1682 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1683 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1684 {
1685   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1686   PetscErrorCode ierr;
1687 
1688   PetscFunctionBegin;
1689   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1690   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1691   PetscFunctionReturn(0);
1692 }
1693 #undef __FUNCT__
1694 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1695 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1696 {
1697   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1698   PetscErrorCode ierr;
1699 
1700   PetscFunctionBegin;
1701   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1702   PetscFunctionReturn(0);
1703 }
1704 
1705 #undef __FUNCT__
1706 #define __FUNCT__ "MatEqual_MPIAIJ"
1707 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1708 {
1709   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1710   Mat            a,b,c,d;
1711   PetscTruth     flg;
1712   PetscErrorCode ierr;
1713 
1714   PetscFunctionBegin;
1715   a = matA->A; b = matA->B;
1716   c = matB->A; d = matB->B;
1717 
1718   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1719   if (flg) {
1720     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1721   }
1722   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1723   PetscFunctionReturn(0);
1724 }
1725 
1726 #undef __FUNCT__
1727 #define __FUNCT__ "MatCopy_MPIAIJ"
1728 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1729 {
1730   PetscErrorCode ierr;
1731   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1732   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1733 
1734   PetscFunctionBegin;
1735   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1736   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1737     /* because of the column compression in the off-processor part of the matrix a->B,
1738        the number of columns in a->B and b->B may be different, hence we cannot call
1739        the MatCopy() directly on the two parts. If need be, we can provide a more
1740        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1741        then copying the submatrices */
1742     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1743   } else {
1744     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1745     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1746   }
1747   PetscFunctionReturn(0);
1748 }
1749 
1750 #undef __FUNCT__
1751 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1752 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1753 {
1754   PetscErrorCode ierr;
1755 
1756   PetscFunctionBegin;
1757   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1758   PetscFunctionReturn(0);
1759 }
1760 
1761 #include "petscblaslapack.h"
1762 #undef __FUNCT__
1763 #define __FUNCT__ "MatAXPY_MPIAIJ"
1764 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1765 {
1766   PetscErrorCode ierr;
1767   PetscInt       i;
1768   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1769   PetscBLASInt   bnz,one=1;
1770   Mat_SeqAIJ     *x,*y;
1771 
1772   PetscFunctionBegin;
1773   if (str == SAME_NONZERO_PATTERN) {
1774     PetscScalar alpha = a;
1775     x = (Mat_SeqAIJ *)xx->A->data;
1776     y = (Mat_SeqAIJ *)yy->A->data;
1777     bnz = PetscBLASIntCast(x->nz);
1778     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1779     x = (Mat_SeqAIJ *)xx->B->data;
1780     y = (Mat_SeqAIJ *)yy->B->data;
1781     bnz = PetscBLASIntCast(x->nz);
1782     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1783   } else if (str == SUBSET_NONZERO_PATTERN) {
1784     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1785 
1786     x = (Mat_SeqAIJ *)xx->B->data;
1787     y = (Mat_SeqAIJ *)yy->B->data;
1788     if (y->xtoy && y->XtoY != xx->B) {
1789       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1790       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1791     }
1792     if (!y->xtoy) { /* get xtoy */
1793       ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1794       y->XtoY = xx->B;
1795       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1796     }
1797     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1798   } else {
1799     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1800   }
1801   PetscFunctionReturn(0);
1802 }
1803 
1804 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1805 
1806 #undef __FUNCT__
1807 #define __FUNCT__ "MatConjugate_MPIAIJ"
1808 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1809 {
1810 #if defined(PETSC_USE_COMPLEX)
1811   PetscErrorCode ierr;
1812   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1813 
1814   PetscFunctionBegin;
1815   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1816   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1817 #else
1818   PetscFunctionBegin;
1819 #endif
1820   PetscFunctionReturn(0);
1821 }
1822 
1823 #undef __FUNCT__
1824 #define __FUNCT__ "MatRealPart_MPIAIJ"
1825 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1826 {
1827   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1828   PetscErrorCode ierr;
1829 
1830   PetscFunctionBegin;
1831   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1832   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1833   PetscFunctionReturn(0);
1834 }
1835 
1836 #undef __FUNCT__
1837 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1838 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1839 {
1840   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1841   PetscErrorCode ierr;
1842 
1843   PetscFunctionBegin;
1844   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1845   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1846   PetscFunctionReturn(0);
1847 }
1848 
1849 #ifdef PETSC_HAVE_PBGL
1850 
1851 #include <boost/parallel/mpi/bsp_process_group.hpp>
1852 #include <boost/graph/distributed/ilu_default_graph.hpp>
1853 #include <boost/graph/distributed/ilu_0_block.hpp>
1854 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1855 #include <boost/graph/distributed/petsc/interface.hpp>
1856 #include <boost/multi_array.hpp>
1857 #include <boost/parallel/distributed_property_map.hpp>
1858 
1859 #undef __FUNCT__
1860 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1861 /*
1862   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1863 */
1864 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact)
1865 {
1866   namespace petsc = boost::distributed::petsc;
1867 
1868   namespace graph_dist = boost::graph::distributed;
1869   using boost::graph::distributed::ilu_default::process_group_type;
1870   using boost::graph::ilu_permuted;
1871 
1872   PetscTruth      row_identity, col_identity;
1873   PetscContainer  c;
1874   PetscInt        m, n, M, N;
1875   PetscErrorCode  ierr;
1876 
1877   PetscFunctionBegin;
1878   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1879   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1880   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1881   if (!row_identity || !col_identity) {
1882     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1883   }
1884 
1885   process_group_type pg;
1886   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1887   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1888   lgraph_type&   level_graph = *lgraph_p;
1889   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1890 
1891   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1892   ilu_permuted(level_graph);
1893 
1894   /* put together the new matrix */
1895   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1896   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1897   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1898   ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr);
1899   ierr = MatSetType(*fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1900   ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1901   ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1902   (*fact)->factor = FACTOR_LU;
1903 
1904   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1905   ierr = PetscContainerSetPointer(c, lgraph_p);
1906   ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c);
1907   PetscFunctionReturn(0);
1908 }
1909 
1910 #undef __FUNCT__
1911 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1912 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B)
1913 {
1914   PetscFunctionBegin;
1915   PetscFunctionReturn(0);
1916 }
1917 
1918 #undef __FUNCT__
1919 #define __FUNCT__ "MatSolve_MPIAIJ"
1920 /*
1921   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1922 */
1923 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1924 {
1925   namespace graph_dist = boost::graph::distributed;
1926 
1927   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1928   lgraph_type*   lgraph_p;
1929   PetscContainer c;
1930   PetscErrorCode ierr;
1931 
1932   PetscFunctionBegin;
1933   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1934   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1935   ierr = VecCopy(b, x); CHKERRQ(ierr);
1936 
1937   PetscScalar* array_x;
1938   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1939   PetscInt sx;
1940   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1941 
1942   PetscScalar* array_b;
1943   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1944   PetscInt sb;
1945   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1946 
1947   lgraph_type&   level_graph = *lgraph_p;
1948   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1949 
1950   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
1951   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
1952                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
1953 
1954   typedef boost::iterator_property_map<array_ref_type::iterator,
1955                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
1956   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
1957                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
1958 
1959   ilu_set_solve(*lgraph_p, vector_b, vector_x);
1960 
1961   PetscFunctionReturn(0);
1962 }
1963 #endif
1964 
1965 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
1966   PetscInt       nzlocal,nsends,nrecvs;
1967   PetscMPIInt    *send_rank;
1968   PetscInt       *sbuf_nz,*sbuf_j,**rbuf_j;
1969   PetscScalar    *sbuf_a,**rbuf_a;
1970   PetscErrorCode (*MatDestroy)(Mat);
1971 } Mat_Redundant;
1972 
1973 #undef __FUNCT__
1974 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
1975 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
1976 {
1977   PetscErrorCode       ierr;
1978   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
1979   PetscInt             i;
1980 
1981   PetscFunctionBegin;
1982   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
1983   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1984   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1985   for (i=0; i<redund->nrecvs; i++){
1986     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1987     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1988   }
1989   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1990   ierr = PetscFree(redund);CHKERRQ(ierr);
1991   PetscFunctionReturn(0);
1992 }
1993 
1994 #undef __FUNCT__
1995 #define __FUNCT__ "MatDestroy_MatRedundant"
1996 PetscErrorCode MatDestroy_MatRedundant(Mat A)
1997 {
1998   PetscErrorCode  ierr;
1999   PetscContainer  container;
2000   Mat_Redundant   *redund=PETSC_NULL;
2001 
2002   PetscFunctionBegin;
2003   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2004   if (container) {
2005     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2006   } else {
2007     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2008   }
2009   A->ops->destroy = redund->MatDestroy;
2010   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2011   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2012   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2013   PetscFunctionReturn(0);
2014 }
2015 
2016 #undef __FUNCT__
2017 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2018 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2019 {
2020   PetscMPIInt    rank,size;
2021   MPI_Comm       comm=((PetscObject)mat)->comm;
2022   PetscErrorCode ierr;
2023   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2024   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2025   PetscInt       *rowrange=mat->rmap.range;
2026   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2027   Mat            A=aij->A,B=aij->B,C=*matredundant;
2028   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2029   PetscScalar    *sbuf_a;
2030   PetscInt       nzlocal=a->nz+b->nz;
2031   PetscInt       j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2032   PetscInt       rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N;
2033   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2034   MatScalar      *aworkA,*aworkB;
2035   PetscScalar    *vals;
2036   PetscMPIInt    tag1,tag2,tag3,imdex;
2037   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2038                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2039   MPI_Status     recv_status,*send_status;
2040   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2041   PetscInt       **rbuf_j=PETSC_NULL;
2042   PetscScalar    **rbuf_a=PETSC_NULL;
2043   Mat_Redundant  *redund=PETSC_NULL;
2044   PetscContainer container;
2045 
2046   PetscFunctionBegin;
2047   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2048   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2049 
2050   if (reuse == MAT_REUSE_MATRIX) {
2051     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2052     if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2053     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2054     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2055     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2056     if (container) {
2057       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2058     } else {
2059       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2060     }
2061     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2062 
2063     nsends    = redund->nsends;
2064     nrecvs    = redund->nrecvs;
2065     send_rank = redund->send_rank; recv_rank = send_rank + size;
2066     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
2067     sbuf_j    = redund->sbuf_j;
2068     sbuf_a    = redund->sbuf_a;
2069     rbuf_j    = redund->rbuf_j;
2070     rbuf_a    = redund->rbuf_a;
2071   }
2072 
2073   if (reuse == MAT_INITIAL_MATRIX){
2074     PetscMPIInt  subrank,subsize;
2075     PetscInt     nleftover,np_subcomm;
2076     /* get the destination processors' id send_rank, nsends and nrecvs */
2077     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2078     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2079     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
2080     recv_rank = send_rank + size;
2081     np_subcomm = size/nsubcomm;
2082     nleftover  = size - nsubcomm*np_subcomm;
2083     nsends = 0; nrecvs = 0;
2084     for (i=0; i<size; i++){ /* i=rank*/
2085       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2086         send_rank[nsends] = i; nsends++;
2087         recv_rank[nrecvs++] = i;
2088       }
2089     }
2090     if (rank >= size - nleftover){/* this proc is a leftover processor */
2091       i = size-nleftover-1;
2092       j = 0;
2093       while (j < nsubcomm - nleftover){
2094         send_rank[nsends++] = i;
2095         i--; j++;
2096       }
2097     }
2098 
2099     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2100       for (i=0; i<nleftover; i++){
2101         recv_rank[nrecvs++] = size-nleftover+i;
2102       }
2103     }
2104 
2105     /* allocate sbuf_j, sbuf_a */
2106     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2107     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2108     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2109   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2110 
2111   /* copy mat's local entries into the buffers */
2112   if (reuse == MAT_INITIAL_MATRIX){
2113     rownz_max = 0;
2114     rptr = sbuf_j;
2115     cols = sbuf_j + rend-rstart + 1;
2116     vals = sbuf_a;
2117     rptr[0] = 0;
2118     for (i=0; i<rend-rstart; i++){
2119       row = i + rstart;
2120       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2121       ncols  = nzA + nzB;
2122       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2123       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2124       /* load the column indices for this row into cols */
2125       lwrite = 0;
2126       for (l=0; l<nzB; l++) {
2127         if ((ctmp = bmap[cworkB[l]]) < cstart){
2128           vals[lwrite]   = aworkB[l];
2129           cols[lwrite++] = ctmp;
2130         }
2131       }
2132       for (l=0; l<nzA; l++){
2133         vals[lwrite]   = aworkA[l];
2134         cols[lwrite++] = cstart + cworkA[l];
2135       }
2136       for (l=0; l<nzB; l++) {
2137         if ((ctmp = bmap[cworkB[l]]) >= cend){
2138           vals[lwrite]   = aworkB[l];
2139           cols[lwrite++] = ctmp;
2140         }
2141       }
2142       vals += ncols;
2143       cols += ncols;
2144       rptr[i+1] = rptr[i] + ncols;
2145       if (rownz_max < ncols) rownz_max = ncols;
2146     }
2147     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2148   } else { /* only copy matrix values into sbuf_a */
2149     rptr = sbuf_j;
2150     vals = sbuf_a;
2151     rptr[0] = 0;
2152     for (i=0; i<rend-rstart; i++){
2153       row = i + rstart;
2154       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2155       ncols  = nzA + nzB;
2156       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2157       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2158       lwrite = 0;
2159       for (l=0; l<nzB; l++) {
2160         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2161       }
2162       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2163       for (l=0; l<nzB; l++) {
2164         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2165       }
2166       vals += ncols;
2167       rptr[i+1] = rptr[i] + ncols;
2168     }
2169   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2170 
2171   /* send nzlocal to others, and recv other's nzlocal */
2172   /*--------------------------------------------------*/
2173   if (reuse == MAT_INITIAL_MATRIX){
2174     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2175     s_waits2 = s_waits3 + nsends;
2176     s_waits1 = s_waits2 + nsends;
2177     r_waits1 = s_waits1 + nsends;
2178     r_waits2 = r_waits1 + nrecvs;
2179     r_waits3 = r_waits2 + nrecvs;
2180   } else {
2181     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2182     r_waits3 = s_waits3 + nsends;
2183   }
2184 
2185   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2186   if (reuse == MAT_INITIAL_MATRIX){
2187     /* get new tags to keep the communication clean */
2188     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2189     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2190     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2191     rbuf_nz = sbuf_nz + nsends;
2192 
2193     /* post receives of other's nzlocal */
2194     for (i=0; i<nrecvs; i++){
2195       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2196     }
2197     /* send nzlocal to others */
2198     for (i=0; i<nsends; i++){
2199       sbuf_nz[i] = nzlocal;
2200       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2201     }
2202     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2203     count = nrecvs;
2204     while (count) {
2205       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2206       recv_rank[imdex] = recv_status.MPI_SOURCE;
2207       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2208       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2209 
2210       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2211       rbuf_nz[imdex] += i + 2;
2212       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2213       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2214       count--;
2215     }
2216     /* wait on sends of nzlocal */
2217     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2218     /* send mat->i,j to others, and recv from other's */
2219     /*------------------------------------------------*/
2220     for (i=0; i<nsends; i++){
2221       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2222       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2223     }
2224     /* wait on receives of mat->i,j */
2225     /*------------------------------*/
2226     count = nrecvs;
2227     while (count) {
2228       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2229       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2230       count--;
2231     }
2232     /* wait on sends of mat->i,j */
2233     /*---------------------------*/
2234     if (nsends) {
2235       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2236     }
2237   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2238 
2239   /* post receives, send and receive mat->a */
2240   /*----------------------------------------*/
2241   for (imdex=0; imdex<nrecvs; imdex++) {
2242     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2243   }
2244   for (i=0; i<nsends; i++){
2245     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2246   }
2247   count = nrecvs;
2248   while (count) {
2249     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2250     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2251     count--;
2252   }
2253   if (nsends) {
2254     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2255   }
2256 
2257   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2258 
2259   /* create redundant matrix */
2260   /*-------------------------*/
2261   if (reuse == MAT_INITIAL_MATRIX){
2262     /* compute rownz_max for preallocation */
2263     for (imdex=0; imdex<nrecvs; imdex++){
2264       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2265       rptr = rbuf_j[imdex];
2266       for (i=0; i<j; i++){
2267         ncols = rptr[i+1] - rptr[i];
2268         if (rownz_max < ncols) rownz_max = ncols;
2269       }
2270     }
2271 
2272     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2273     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2274     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2275     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2276     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2277   } else {
2278     C = *matredundant;
2279   }
2280 
2281   /* insert local matrix entries */
2282   rptr = sbuf_j;
2283   cols = sbuf_j + rend-rstart + 1;
2284   vals = sbuf_a;
2285   for (i=0; i<rend-rstart; i++){
2286     row   = i + rstart;
2287     ncols = rptr[i+1] - rptr[i];
2288     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2289     vals += ncols;
2290     cols += ncols;
2291   }
2292   /* insert received matrix entries */
2293   for (imdex=0; imdex<nrecvs; imdex++){
2294     rstart = rowrange[recv_rank[imdex]];
2295     rend   = rowrange[recv_rank[imdex]+1];
2296     rptr = rbuf_j[imdex];
2297     cols = rbuf_j[imdex] + rend-rstart + 1;
2298     vals = rbuf_a[imdex];
2299     for (i=0; i<rend-rstart; i++){
2300       row   = i + rstart;
2301       ncols = rptr[i+1] - rptr[i];
2302       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2303       vals += ncols;
2304       cols += ncols;
2305     }
2306   }
2307   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2308   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2309   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2310   if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N);
2311   if (reuse == MAT_INITIAL_MATRIX){
2312     PetscContainer container;
2313     *matredundant = C;
2314     /* create a supporting struct and attach it to C for reuse */
2315     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2316     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2317     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2318     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2319     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2320 
2321     redund->nzlocal = nzlocal;
2322     redund->nsends  = nsends;
2323     redund->nrecvs  = nrecvs;
2324     redund->send_rank = send_rank;
2325     redund->sbuf_nz = sbuf_nz;
2326     redund->sbuf_j  = sbuf_j;
2327     redund->sbuf_a  = sbuf_a;
2328     redund->rbuf_j  = rbuf_j;
2329     redund->rbuf_a  = rbuf_a;
2330 
2331     redund->MatDestroy = C->ops->destroy;
2332     C->ops->destroy    = MatDestroy_MatRedundant;
2333   }
2334   PetscFunctionReturn(0);
2335 }
2336 
2337 #undef __FUNCT__
2338 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2339 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2340 {
2341   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2342   PetscInt       n      = A->rmap.n;
2343   PetscInt       cstart = A->cmap.rstart;
2344   PetscInt      *cmap   = mat->garray;
2345   PetscInt      *diagIdx, *offdiagIdx;
2346   Vec            diagV, offdiagV;
2347   PetscScalar   *a, *diagA, *offdiagA;
2348   PetscInt       r;
2349   PetscErrorCode ierr;
2350 
2351   PetscFunctionBegin;
2352   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2353   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2354   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2355   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2356   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2357   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2358   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2359   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2360   for(r = 0; r < n; ++r) {
2361     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2362       a[r]   = diagA[r];
2363       idx[r] = cstart + diagIdx[r];
2364     } else {
2365       a[r]   = offdiagA[r];
2366       idx[r] = cmap[offdiagIdx[r]];
2367     }
2368   }
2369   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2370   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2371   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2372   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2373   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2374   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2375   PetscFunctionReturn(0);
2376 }
2377 
2378 #undef __FUNCT__
2379 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2380 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[])
2381 {
2382   PetscErrorCode ierr;
2383 
2384   PetscFunctionBegin;
2385   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
2386   PetscFunctionReturn(0);
2387 }
2388 
2389 /* -------------------------------------------------------------------*/
2390 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2391        MatGetRow_MPIAIJ,
2392        MatRestoreRow_MPIAIJ,
2393        MatMult_MPIAIJ,
2394 /* 4*/ MatMultAdd_MPIAIJ,
2395        MatMultTranspose_MPIAIJ,
2396        MatMultTransposeAdd_MPIAIJ,
2397 #ifdef PETSC_HAVE_PBGL
2398        MatSolve_MPIAIJ,
2399 #else
2400        0,
2401 #endif
2402        0,
2403        0,
2404 /*10*/ 0,
2405        0,
2406        0,
2407        MatRelax_MPIAIJ,
2408        MatTranspose_MPIAIJ,
2409 /*15*/ MatGetInfo_MPIAIJ,
2410        MatEqual_MPIAIJ,
2411        MatGetDiagonal_MPIAIJ,
2412        MatDiagonalScale_MPIAIJ,
2413        MatNorm_MPIAIJ,
2414 /*20*/ MatAssemblyBegin_MPIAIJ,
2415        MatAssemblyEnd_MPIAIJ,
2416        0,
2417        MatSetOption_MPIAIJ,
2418        MatZeroEntries_MPIAIJ,
2419 /*25*/ MatZeroRows_MPIAIJ,
2420        0,
2421 #ifdef PETSC_HAVE_PBGL
2422        MatLUFactorNumeric_MPIAIJ,
2423 #else
2424        0,
2425 #endif
2426        0,
2427        0,
2428 /*30*/ MatSetUpPreallocation_MPIAIJ,
2429 #ifdef PETSC_HAVE_PBGL
2430        MatILUFactorSymbolic_MPIAIJ,
2431 #else
2432        0,
2433 #endif
2434        0,
2435        0,
2436        0,
2437 /*35*/ MatDuplicate_MPIAIJ,
2438        0,
2439        0,
2440        0,
2441        0,
2442 /*40*/ MatAXPY_MPIAIJ,
2443        MatGetSubMatrices_MPIAIJ,
2444        MatIncreaseOverlap_MPIAIJ,
2445        MatGetValues_MPIAIJ,
2446        MatCopy_MPIAIJ,
2447 /*45*/ 0,
2448        MatScale_MPIAIJ,
2449        0,
2450        0,
2451        0,
2452 /*50*/ MatSetBlockSize_MPIAIJ,
2453        0,
2454        0,
2455        0,
2456        0,
2457 /*55*/ MatFDColoringCreate_MPIAIJ,
2458        0,
2459        MatSetUnfactored_MPIAIJ,
2460        MatPermute_MPIAIJ,
2461        0,
2462 /*60*/ MatGetSubMatrix_MPIAIJ,
2463        MatDestroy_MPIAIJ,
2464        MatView_MPIAIJ,
2465        0,
2466        0,
2467 /*65*/ 0,
2468        0,
2469        0,
2470        0,
2471        0,
2472 /*70*/ 0,
2473        0,
2474        MatSetColoring_MPIAIJ,
2475 #if defined(PETSC_HAVE_ADIC)
2476        MatSetValuesAdic_MPIAIJ,
2477 #else
2478        0,
2479 #endif
2480        MatSetValuesAdifor_MPIAIJ,
2481 /*75*/ 0,
2482        0,
2483        0,
2484        0,
2485        0,
2486 /*80*/ 0,
2487        0,
2488        0,
2489        0,
2490 /*84*/ MatLoad_MPIAIJ,
2491        0,
2492        0,
2493        0,
2494        0,
2495        0,
2496 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
2497        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2498        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2499        MatPtAP_Basic,
2500        MatPtAPSymbolic_MPIAIJ,
2501 /*95*/ MatPtAPNumeric_MPIAIJ,
2502        0,
2503        0,
2504        0,
2505        0,
2506 /*100*/0,
2507        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2508        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2509        MatConjugate_MPIAIJ,
2510        0,
2511 /*105*/MatSetValuesRow_MPIAIJ,
2512        MatRealPart_MPIAIJ,
2513        MatImaginaryPart_MPIAIJ,
2514        0,
2515        0,
2516 /*110*/0,
2517        MatGetRedundantMatrix_MPIAIJ,
2518        MatGetRowMin_MPIAIJ,
2519        0,
2520        0,
2521 /*115*/MatGetSeqNonzerostructure_MPIAIJ};
2522 
2523 /* ----------------------------------------------------------------------------------------*/
2524 
2525 EXTERN_C_BEGIN
2526 #undef __FUNCT__
2527 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2528 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2529 {
2530   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2531   PetscErrorCode ierr;
2532 
2533   PetscFunctionBegin;
2534   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2535   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2536   PetscFunctionReturn(0);
2537 }
2538 EXTERN_C_END
2539 
2540 EXTERN_C_BEGIN
2541 #undef __FUNCT__
2542 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2543 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2544 {
2545   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2546   PetscErrorCode ierr;
2547 
2548   PetscFunctionBegin;
2549   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2550   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2551   PetscFunctionReturn(0);
2552 }
2553 EXTERN_C_END
2554 
2555 #include "petscpc.h"
2556 EXTERN_C_BEGIN
2557 #undef __FUNCT__
2558 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2559 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2560 {
2561   Mat_MPIAIJ     *b;
2562   PetscErrorCode ierr;
2563   PetscInt       i;
2564 
2565   PetscFunctionBegin;
2566   B->preallocated = PETSC_TRUE;
2567   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2568   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2569   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2570   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2571 
2572   B->rmap.bs = B->cmap.bs = 1;
2573   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2574   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2575   if (d_nnz) {
2576     for (i=0; i<B->rmap.n; i++) {
2577       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2578     }
2579   }
2580   if (o_nnz) {
2581     for (i=0; i<B->rmap.n; i++) {
2582       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2583     }
2584   }
2585   b = (Mat_MPIAIJ*)B->data;
2586 
2587   /* Explicitly create 2 MATSEQAIJ matrices. */
2588   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2589   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
2590   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2591   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2592   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2593   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
2594   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2595   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2596 
2597   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2598   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2599 
2600   PetscFunctionReturn(0);
2601 }
2602 EXTERN_C_END
2603 
2604 #undef __FUNCT__
2605 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2606 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2607 {
2608   Mat            mat;
2609   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2610   PetscErrorCode ierr;
2611 
2612   PetscFunctionBegin;
2613   *newmat       = 0;
2614   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2615   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2616   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2617   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2618   a    = (Mat_MPIAIJ*)mat->data;
2619 
2620   mat->factor       = matin->factor;
2621   mat->rmap.bs      = matin->rmap.bs;
2622   mat->assembled    = PETSC_TRUE;
2623   mat->insertmode   = NOT_SET_VALUES;
2624   mat->preallocated = PETSC_TRUE;
2625 
2626   a->size           = oldmat->size;
2627   a->rank           = oldmat->rank;
2628   a->donotstash     = oldmat->donotstash;
2629   a->roworiented    = oldmat->roworiented;
2630   a->rowindices     = 0;
2631   a->rowvalues      = 0;
2632   a->getrowactive   = PETSC_FALSE;
2633 
2634   ierr = PetscMapCopy(((PetscObject)mat)->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2635   ierr = PetscMapCopy(((PetscObject)mat)->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2636 
2637   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2638   if (oldmat->colmap) {
2639 #if defined (PETSC_USE_CTABLE)
2640     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2641 #else
2642     ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2643     ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2644     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2645 #endif
2646   } else a->colmap = 0;
2647   if (oldmat->garray) {
2648     PetscInt len;
2649     len  = oldmat->B->cmap.n;
2650     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2651     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2652     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2653   } else a->garray = 0;
2654 
2655   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2656   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2657   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2658   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2659   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2660   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2661   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2662   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2663   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2664   *newmat = mat;
2665   PetscFunctionReturn(0);
2666 }
2667 
2668 #include "petscsys.h"
2669 
2670 #undef __FUNCT__
2671 #define __FUNCT__ "MatLoad_MPIAIJ"
2672 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2673 {
2674   Mat            A;
2675   PetscScalar    *vals,*svals;
2676   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2677   MPI_Status     status;
2678   PetscErrorCode ierr;
2679   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
2680   PetscInt       i,nz,j,rstart,rend,mmax;
2681   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2682   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2683   PetscInt       cend,cstart,n,*rowners;
2684   int            fd;
2685 
2686   PetscFunctionBegin;
2687   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2688   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2689   if (!rank) {
2690     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2691     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2692     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2693   }
2694 
2695   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2696   M = header[1]; N = header[2];
2697   /* determine ownership of all rows */
2698   m    = M/size + ((M % size) > rank);
2699   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2700   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2701 
2702   /* First process needs enough room for process with most rows */
2703   if (!rank) {
2704     mmax       = rowners[1];
2705     for (i=2; i<size; i++) {
2706       mmax = PetscMax(mmax,rowners[i]);
2707     }
2708   } else mmax = m;
2709 
2710   rowners[0] = 0;
2711   for (i=2; i<=size; i++) {
2712     rowners[i] += rowners[i-1];
2713   }
2714   rstart = rowners[rank];
2715   rend   = rowners[rank+1];
2716 
2717   /* distribute row lengths to all processors */
2718   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2719   if (!rank) {
2720     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2721     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2722     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2723     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2724     for (j=0; j<m; j++) {
2725       procsnz[0] += ourlens[j];
2726     }
2727     for (i=1; i<size; i++) {
2728       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2729       /* calculate the number of nonzeros on each processor */
2730       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2731         procsnz[i] += rowlengths[j];
2732       }
2733       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2734     }
2735     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2736   } else {
2737     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2738   }
2739 
2740   if (!rank) {
2741     /* determine max buffer needed and allocate it */
2742     maxnz = 0;
2743     for (i=0; i<size; i++) {
2744       maxnz = PetscMax(maxnz,procsnz[i]);
2745     }
2746     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2747 
2748     /* read in my part of the matrix column indices  */
2749     nz   = procsnz[0];
2750     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2751     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2752 
2753     /* read in every one elses and ship off */
2754     for (i=1; i<size; i++) {
2755       nz   = procsnz[i];
2756       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2757       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2758     }
2759     ierr = PetscFree(cols);CHKERRQ(ierr);
2760   } else {
2761     /* determine buffer space needed for message */
2762     nz = 0;
2763     for (i=0; i<m; i++) {
2764       nz += ourlens[i];
2765     }
2766     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2767 
2768     /* receive message of column indices*/
2769     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2770     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2771     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2772   }
2773 
2774   /* determine column ownership if matrix is not square */
2775   if (N != M) {
2776     n      = N/size + ((N % size) > rank);
2777     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2778     cstart = cend - n;
2779   } else {
2780     cstart = rstart;
2781     cend   = rend;
2782     n      = cend - cstart;
2783   }
2784 
2785   /* loop over local rows, determining number of off diagonal entries */
2786   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2787   jj = 0;
2788   for (i=0; i<m; i++) {
2789     for (j=0; j<ourlens[i]; j++) {
2790       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2791       jj++;
2792     }
2793   }
2794 
2795   /* create our matrix */
2796   for (i=0; i<m; i++) {
2797     ourlens[i] -= offlens[i];
2798   }
2799   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2800   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2801   ierr = MatSetType(A,type);CHKERRQ(ierr);
2802   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2803 
2804   for (i=0; i<m; i++) {
2805     ourlens[i] += offlens[i];
2806   }
2807 
2808   if (!rank) {
2809     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2810 
2811     /* read in my part of the matrix numerical values  */
2812     nz   = procsnz[0];
2813     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2814 
2815     /* insert into matrix */
2816     jj      = rstart;
2817     smycols = mycols;
2818     svals   = vals;
2819     for (i=0; i<m; i++) {
2820       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2821       smycols += ourlens[i];
2822       svals   += ourlens[i];
2823       jj++;
2824     }
2825 
2826     /* read in other processors and ship out */
2827     for (i=1; i<size; i++) {
2828       nz   = procsnz[i];
2829       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2830       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2831     }
2832     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2833   } else {
2834     /* receive numeric values */
2835     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2836 
2837     /* receive message of values*/
2838     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2839     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2840     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2841 
2842     /* insert into matrix */
2843     jj      = rstart;
2844     smycols = mycols;
2845     svals   = vals;
2846     for (i=0; i<m; i++) {
2847       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2848       smycols += ourlens[i];
2849       svals   += ourlens[i];
2850       jj++;
2851     }
2852   }
2853   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2854   ierr = PetscFree(vals);CHKERRQ(ierr);
2855   ierr = PetscFree(mycols);CHKERRQ(ierr);
2856   ierr = PetscFree(rowners);CHKERRQ(ierr);
2857 
2858   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2859   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2860   *newmat = A;
2861   PetscFunctionReturn(0);
2862 }
2863 
2864 #undef __FUNCT__
2865 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2866 /*
2867     Not great since it makes two copies of the submatrix, first an SeqAIJ
2868   in local and then by concatenating the local matrices the end result.
2869   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2870 */
2871 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2872 {
2873   PetscErrorCode ierr;
2874   PetscMPIInt    rank,size;
2875   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2876   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2877   Mat            *local,M,Mreuse;
2878   MatScalar      *vwork,*aa;
2879   MPI_Comm       comm = ((PetscObject)mat)->comm;
2880   Mat_SeqAIJ     *aij;
2881 
2882 
2883   PetscFunctionBegin;
2884   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2885   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2886 
2887   if (call ==  MAT_REUSE_MATRIX) {
2888     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2889     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2890     local = &Mreuse;
2891     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2892   } else {
2893     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2894     Mreuse = *local;
2895     ierr   = PetscFree(local);CHKERRQ(ierr);
2896   }
2897 
2898   /*
2899       m - number of local rows
2900       n - number of columns (same on all processors)
2901       rstart - first row in new global matrix generated
2902   */
2903   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2904   if (call == MAT_INITIAL_MATRIX) {
2905     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2906     ii  = aij->i;
2907     jj  = aij->j;
2908 
2909     /*
2910         Determine the number of non-zeros in the diagonal and off-diagonal
2911         portions of the matrix in order to do correct preallocation
2912     */
2913 
2914     /* first get start and end of "diagonal" columns */
2915     if (csize == PETSC_DECIDE) {
2916       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2917       if (mglobal == n) { /* square matrix */
2918 	nlocal = m;
2919       } else {
2920         nlocal = n/size + ((n % size) > rank);
2921       }
2922     } else {
2923       nlocal = csize;
2924     }
2925     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2926     rstart = rend - nlocal;
2927     if (rank == size - 1 && rend != n) {
2928       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2929     }
2930 
2931     /* next, compute all the lengths */
2932     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2933     olens = dlens + m;
2934     for (i=0; i<m; i++) {
2935       jend = ii[i+1] - ii[i];
2936       olen = 0;
2937       dlen = 0;
2938       for (j=0; j<jend; j++) {
2939         if (*jj < rstart || *jj >= rend) olen++;
2940         else dlen++;
2941         jj++;
2942       }
2943       olens[i] = olen;
2944       dlens[i] = dlen;
2945     }
2946     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2947     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2948     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2949     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2950     ierr = PetscFree(dlens);CHKERRQ(ierr);
2951   } else {
2952     PetscInt ml,nl;
2953 
2954     M = *newmat;
2955     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2956     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2957     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2958     /*
2959          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2960        rather than the slower MatSetValues().
2961     */
2962     M->was_assembled = PETSC_TRUE;
2963     M->assembled     = PETSC_FALSE;
2964   }
2965   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2966   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2967   ii  = aij->i;
2968   jj  = aij->j;
2969   aa  = aij->a;
2970   for (i=0; i<m; i++) {
2971     row   = rstart + i;
2972     nz    = ii[i+1] - ii[i];
2973     cwork = jj;     jj += nz;
2974     vwork = aa;     aa += nz;
2975     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2976   }
2977 
2978   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2979   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2980   *newmat = M;
2981 
2982   /* save submatrix used in processor for next request */
2983   if (call ==  MAT_INITIAL_MATRIX) {
2984     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2985     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2986   }
2987 
2988   PetscFunctionReturn(0);
2989 }
2990 
2991 EXTERN_C_BEGIN
2992 #undef __FUNCT__
2993 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2994 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
2995 {
2996   PetscInt       m,cstart, cend,j,nnz,i,d;
2997   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
2998   const PetscInt *JJ;
2999   PetscScalar    *values;
3000   PetscErrorCode ierr;
3001 
3002   PetscFunctionBegin;
3003   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3004 
3005   B->rmap.bs = B->cmap.bs = 1;
3006   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
3007   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
3008   m      = B->rmap.n;
3009   cstart = B->cmap.rstart;
3010   cend   = B->cmap.rend;
3011   rstart = B->rmap.rstart;
3012 
3013   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
3014   o_nnz = d_nnz + m;
3015 
3016 #if defined(PETSC_USE_DEBUGGING)
3017   for (i=0; i<m; i++) {
3018     nnz     = Ii[i+1]- Ii[i];
3019     JJ      = J + Ii[i];
3020     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3021     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3022     if (nnz && (JJ[nnz-1] >= B->cmap.N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap.N);
3023     for (j=1; j<nnz; j++) {
3024       if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j);
3025     }
3026   }
3027 #endif
3028 
3029   for (i=0; i<m; i++) {
3030     nnz     = Ii[i+1]- Ii[i];
3031     JJ      = J + Ii[i];
3032     nnz_max = PetscMax(nnz_max,nnz);
3033     for (j=0; j<nnz; j++) {
3034       if (*JJ >= cstart) break;
3035       JJ++;
3036     }
3037     d = 0;
3038     for (; j<nnz; j++) {
3039       if (*JJ++ >= cend) break;
3040       d++;
3041     }
3042     d_nnz[i] = d;
3043     o_nnz[i] = nnz - d;
3044   }
3045   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3046   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
3047 
3048   if (v) values = (PetscScalar*)v;
3049   else {
3050     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3051     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3052   }
3053 
3054   for (i=0; i<m; i++) {
3055     ii   = i + rstart;
3056     nnz  = Ii[i+1]- Ii[i];
3057     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3058   }
3059   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3060   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3061 
3062   if (!v) {
3063     ierr = PetscFree(values);CHKERRQ(ierr);
3064   }
3065   PetscFunctionReturn(0);
3066 }
3067 EXTERN_C_END
3068 
3069 #undef __FUNCT__
3070 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3071 /*@
3072    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3073    (the default parallel PETSc format).
3074 
3075    Collective on MPI_Comm
3076 
3077    Input Parameters:
3078 +  B - the matrix
3079 .  i - the indices into j for the start of each local row (starts with zero)
3080 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3081 -  v - optional values in the matrix
3082 
3083    Level: developer
3084 
3085    Notes:
3086        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3087      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3088      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3089 
3090        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3091 
3092        The format which is used for the sparse matrix input, is equivalent to a
3093     row-major ordering.. i.e for the following matrix, the input data expected is
3094     as shown:
3095 
3096         1 0 0
3097         2 0 3     P0
3098        -------
3099         4 5 6     P1
3100 
3101      Process0 [P0]: rows_owned=[0,1]
3102         i =  {0,1,3}  [size = nrow+1  = 2+1]
3103         j =  {0,0,2}  [size = nz = 6]
3104         v =  {1,2,3}  [size = nz = 6]
3105 
3106      Process1 [P1]: rows_owned=[2]
3107         i =  {0,3}    [size = nrow+1  = 1+1]
3108         j =  {0,1,2}  [size = nz = 6]
3109         v =  {4,5,6}  [size = nz = 6]
3110 
3111       The column indices for each row MUST be sorted.
3112 
3113 .keywords: matrix, aij, compressed row, sparse, parallel
3114 
3115 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3116           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3117 @*/
3118 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3119 {
3120   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3121 
3122   PetscFunctionBegin;
3123   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3124   if (f) {
3125     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3126   }
3127   PetscFunctionReturn(0);
3128 }
3129 
3130 #undef __FUNCT__
3131 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3132 /*@C
3133    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3134    (the default parallel PETSc format).  For good matrix assembly performance
3135    the user should preallocate the matrix storage by setting the parameters
3136    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3137    performance can be increased by more than a factor of 50.
3138 
3139    Collective on MPI_Comm
3140 
3141    Input Parameters:
3142 +  A - the matrix
3143 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3144            (same value is used for all local rows)
3145 .  d_nnz - array containing the number of nonzeros in the various rows of the
3146            DIAGONAL portion of the local submatrix (possibly different for each row)
3147            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3148            The size of this array is equal to the number of local rows, i.e 'm'.
3149            You must leave room for the diagonal entry even if it is zero.
3150 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3151            submatrix (same value is used for all local rows).
3152 -  o_nnz - array containing the number of nonzeros in the various rows of the
3153            OFF-DIAGONAL portion of the local submatrix (possibly different for
3154            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3155            structure. The size of this array is equal to the number
3156            of local rows, i.e 'm'.
3157 
3158    If the *_nnz parameter is given then the *_nz parameter is ignored
3159 
3160    The AIJ format (also called the Yale sparse matrix format or
3161    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3162    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3163 
3164    The parallel matrix is partitioned such that the first m0 rows belong to
3165    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3166    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3167 
3168    The DIAGONAL portion of the local submatrix of a processor can be defined
3169    as the submatrix which is obtained by extraction the part corresponding
3170    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3171    first row that belongs to the processor, and r2 is the last row belonging
3172    to the this processor. This is a square mxm matrix. The remaining portion
3173    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3174 
3175    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3176 
3177    You can call MatGetInfo() to get information on how effective the preallocation was;
3178    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3179    You can also run with the option -info and look for messages with the string
3180    malloc in them to see if additional memory allocation was needed.
3181 
3182    Example usage:
3183 
3184    Consider the following 8x8 matrix with 34 non-zero values, that is
3185    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3186    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3187    as follows:
3188 
3189 .vb
3190             1  2  0  |  0  3  0  |  0  4
3191     Proc0   0  5  6  |  7  0  0  |  8  0
3192             9  0 10  | 11  0  0  | 12  0
3193     -------------------------------------
3194            13  0 14  | 15 16 17  |  0  0
3195     Proc1   0 18  0  | 19 20 21  |  0  0
3196             0  0  0  | 22 23  0  | 24  0
3197     -------------------------------------
3198     Proc2  25 26 27  |  0  0 28  | 29  0
3199            30  0  0  | 31 32 33  |  0 34
3200 .ve
3201 
3202    This can be represented as a collection of submatrices as:
3203 
3204 .vb
3205       A B C
3206       D E F
3207       G H I
3208 .ve
3209 
3210    Where the submatrices A,B,C are owned by proc0, D,E,F are
3211    owned by proc1, G,H,I are owned by proc2.
3212 
3213    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3214    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3215    The 'M','N' parameters are 8,8, and have the same values on all procs.
3216 
3217    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3218    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3219    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3220    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3221    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3222    matrix, ans [DF] as another SeqAIJ matrix.
3223 
3224    When d_nz, o_nz parameters are specified, d_nz storage elements are
3225    allocated for every row of the local diagonal submatrix, and o_nz
3226    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3227    One way to choose d_nz and o_nz is to use the max nonzerors per local
3228    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3229    In this case, the values of d_nz,o_nz are:
3230 .vb
3231      proc0 : dnz = 2, o_nz = 2
3232      proc1 : dnz = 3, o_nz = 2
3233      proc2 : dnz = 1, o_nz = 4
3234 .ve
3235    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3236    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3237    for proc3. i.e we are using 12+15+10=37 storage locations to store
3238    34 values.
3239 
3240    When d_nnz, o_nnz parameters are specified, the storage is specified
3241    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3242    In the above case the values for d_nnz,o_nnz are:
3243 .vb
3244      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3245      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3246      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3247 .ve
3248    Here the space allocated is sum of all the above values i.e 34, and
3249    hence pre-allocation is perfect.
3250 
3251    Level: intermediate
3252 
3253 .keywords: matrix, aij, compressed row, sparse, parallel
3254 
3255 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3256           MPIAIJ, MatGetInfo()
3257 @*/
3258 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3259 {
3260   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3261 
3262   PetscFunctionBegin;
3263   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3264   if (f) {
3265     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3266   }
3267   PetscFunctionReturn(0);
3268 }
3269 
3270 #undef __FUNCT__
3271 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3272 /*@
3273      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3274          CSR format the local rows.
3275 
3276    Collective on MPI_Comm
3277 
3278    Input Parameters:
3279 +  comm - MPI communicator
3280 .  m - number of local rows (Cannot be PETSC_DECIDE)
3281 .  n - This value should be the same as the local size used in creating the
3282        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3283        calculated if N is given) For square matrices n is almost always m.
3284 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3285 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3286 .   i - row indices
3287 .   j - column indices
3288 -   a - matrix values
3289 
3290    Output Parameter:
3291 .   mat - the matrix
3292 
3293    Level: intermediate
3294 
3295    Notes:
3296        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3297      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3298      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3299 
3300        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3301 
3302        The format which is used for the sparse matrix input, is equivalent to a
3303     row-major ordering.. i.e for the following matrix, the input data expected is
3304     as shown:
3305 
3306         1 0 0
3307         2 0 3     P0
3308        -------
3309         4 5 6     P1
3310 
3311      Process0 [P0]: rows_owned=[0,1]
3312         i =  {0,1,3}  [size = nrow+1  = 2+1]
3313         j =  {0,0,2}  [size = nz = 6]
3314         v =  {1,2,3}  [size = nz = 6]
3315 
3316      Process1 [P1]: rows_owned=[2]
3317         i =  {0,3}    [size = nrow+1  = 1+1]
3318         j =  {0,1,2}  [size = nz = 6]
3319         v =  {4,5,6}  [size = nz = 6]
3320 
3321 .keywords: matrix, aij, compressed row, sparse, parallel
3322 
3323 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3324           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3325 @*/
3326 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3327 {
3328   PetscErrorCode ierr;
3329 
3330  PetscFunctionBegin;
3331   if (i[0]) {
3332     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3333   }
3334   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3335   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3336   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3337   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3338   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3339   PetscFunctionReturn(0);
3340 }
3341 
3342 #undef __FUNCT__
3343 #define __FUNCT__ "MatCreateMPIAIJ"
3344 /*@C
3345    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3346    (the default parallel PETSc format).  For good matrix assembly performance
3347    the user should preallocate the matrix storage by setting the parameters
3348    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3349    performance can be increased by more than a factor of 50.
3350 
3351    Collective on MPI_Comm
3352 
3353    Input Parameters:
3354 +  comm - MPI communicator
3355 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3356            This value should be the same as the local size used in creating the
3357            y vector for the matrix-vector product y = Ax.
3358 .  n - This value should be the same as the local size used in creating the
3359        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3360        calculated if N is given) For square matrices n is almost always m.
3361 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3362 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3363 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3364            (same value is used for all local rows)
3365 .  d_nnz - array containing the number of nonzeros in the various rows of the
3366            DIAGONAL portion of the local submatrix (possibly different for each row)
3367            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3368            The size of this array is equal to the number of local rows, i.e 'm'.
3369            You must leave room for the diagonal entry even if it is zero.
3370 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3371            submatrix (same value is used for all local rows).
3372 -  o_nnz - array containing the number of nonzeros in the various rows of the
3373            OFF-DIAGONAL portion of the local submatrix (possibly different for
3374            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3375            structure. The size of this array is equal to the number
3376            of local rows, i.e 'm'.
3377 
3378    Output Parameter:
3379 .  A - the matrix
3380 
3381    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3382    MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
3383    true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
3384    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3385 
3386    Notes:
3387    If the *_nnz parameter is given then the *_nz parameter is ignored
3388 
3389    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3390    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3391    storage requirements for this matrix.
3392 
3393    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3394    processor than it must be used on all processors that share the object for
3395    that argument.
3396 
3397    The user MUST specify either the local or global matrix dimensions
3398    (possibly both).
3399 
3400    The parallel matrix is partitioned across processors such that the
3401    first m0 rows belong to process 0, the next m1 rows belong to
3402    process 1, the next m2 rows belong to process 2 etc.. where
3403    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3404    values corresponding to [m x N] submatrix.
3405 
3406    The columns are logically partitioned with the n0 columns belonging
3407    to 0th partition, the next n1 columns belonging to the next
3408    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3409 
3410    The DIAGONAL portion of the local submatrix on any given processor
3411    is the submatrix corresponding to the rows and columns m,n
3412    corresponding to the given processor. i.e diagonal matrix on
3413    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3414    etc. The remaining portion of the local submatrix [m x (N-n)]
3415    constitute the OFF-DIAGONAL portion. The example below better
3416    illustrates this concept.
3417 
3418    For a square global matrix we define each processor's diagonal portion
3419    to be its local rows and the corresponding columns (a square submatrix);
3420    each processor's off-diagonal portion encompasses the remainder of the
3421    local matrix (a rectangular submatrix).
3422 
3423    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3424 
3425    When calling this routine with a single process communicator, a matrix of
3426    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3427    type of communicator, use the construction mechanism:
3428      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
3429 
3430    By default, this format uses inodes (identical nodes) when possible.
3431    We search for consecutive rows with the same nonzero structure, thereby
3432    reusing matrix information to achieve increased efficiency.
3433 
3434    Options Database Keys:
3435 +  -mat_no_inode  - Do not use inodes
3436 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3437 -  -mat_aij_oneindex - Internally use indexing starting at 1
3438         rather than 0.  Note that when calling MatSetValues(),
3439         the user still MUST index entries starting at 0!
3440 
3441 
3442    Example usage:
3443 
3444    Consider the following 8x8 matrix with 34 non-zero values, that is
3445    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3446    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3447    as follows:
3448 
3449 .vb
3450             1  2  0  |  0  3  0  |  0  4
3451     Proc0   0  5  6  |  7  0  0  |  8  0
3452             9  0 10  | 11  0  0  | 12  0
3453     -------------------------------------
3454            13  0 14  | 15 16 17  |  0  0
3455     Proc1   0 18  0  | 19 20 21  |  0  0
3456             0  0  0  | 22 23  0  | 24  0
3457     -------------------------------------
3458     Proc2  25 26 27  |  0  0 28  | 29  0
3459            30  0  0  | 31 32 33  |  0 34
3460 .ve
3461 
3462    This can be represented as a collection of submatrices as:
3463 
3464 .vb
3465       A B C
3466       D E F
3467       G H I
3468 .ve
3469 
3470    Where the submatrices A,B,C are owned by proc0, D,E,F are
3471    owned by proc1, G,H,I are owned by proc2.
3472 
3473    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3474    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3475    The 'M','N' parameters are 8,8, and have the same values on all procs.
3476 
3477    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3478    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3479    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3480    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3481    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3482    matrix, ans [DF] as another SeqAIJ matrix.
3483 
3484    When d_nz, o_nz parameters are specified, d_nz storage elements are
3485    allocated for every row of the local diagonal submatrix, and o_nz
3486    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3487    One way to choose d_nz and o_nz is to use the max nonzerors per local
3488    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3489    In this case, the values of d_nz,o_nz are:
3490 .vb
3491      proc0 : dnz = 2, o_nz = 2
3492      proc1 : dnz = 3, o_nz = 2
3493      proc2 : dnz = 1, o_nz = 4
3494 .ve
3495    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3496    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3497    for proc3. i.e we are using 12+15+10=37 storage locations to store
3498    34 values.
3499 
3500    When d_nnz, o_nnz parameters are specified, the storage is specified
3501    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3502    In the above case the values for d_nnz,o_nnz are:
3503 .vb
3504      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3505      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3506      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3507 .ve
3508    Here the space allocated is sum of all the above values i.e 34, and
3509    hence pre-allocation is perfect.
3510 
3511    Level: intermediate
3512 
3513 .keywords: matrix, aij, compressed row, sparse, parallel
3514 
3515 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3516           MPIAIJ, MatCreateMPIAIJWithArrays()
3517 @*/
3518 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3519 {
3520   PetscErrorCode ierr;
3521   PetscMPIInt    size;
3522 
3523   PetscFunctionBegin;
3524   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3525   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3526   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3527   if (size > 1) {
3528     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3529     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3530   } else {
3531     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3532     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3533   }
3534   PetscFunctionReturn(0);
3535 }
3536 
3537 #undef __FUNCT__
3538 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3539 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3540 {
3541   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3542 
3543   PetscFunctionBegin;
3544   *Ad     = a->A;
3545   *Ao     = a->B;
3546   *colmap = a->garray;
3547   PetscFunctionReturn(0);
3548 }
3549 
3550 #undef __FUNCT__
3551 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3552 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3553 {
3554   PetscErrorCode ierr;
3555   PetscInt       i;
3556   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3557 
3558   PetscFunctionBegin;
3559   if (coloring->ctype == IS_COLORING_GLOBAL) {
3560     ISColoringValue *allcolors,*colors;
3561     ISColoring      ocoloring;
3562 
3563     /* set coloring for diagonal portion */
3564     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3565 
3566     /* set coloring for off-diagonal portion */
3567     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3568     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3569     for (i=0; i<a->B->cmap.n; i++) {
3570       colors[i] = allcolors[a->garray[i]];
3571     }
3572     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3573     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3574     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3575     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3576   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3577     ISColoringValue *colors;
3578     PetscInt        *larray;
3579     ISColoring      ocoloring;
3580 
3581     /* set coloring for diagonal portion */
3582     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3583     for (i=0; i<a->A->cmap.n; i++) {
3584       larray[i] = i + A->cmap.rstart;
3585     }
3586     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3587     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3588     for (i=0; i<a->A->cmap.n; i++) {
3589       colors[i] = coloring->colors[larray[i]];
3590     }
3591     ierr = PetscFree(larray);CHKERRQ(ierr);
3592     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3593     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3594     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3595 
3596     /* set coloring for off-diagonal portion */
3597     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3598     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3599     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3600     for (i=0; i<a->B->cmap.n; i++) {
3601       colors[i] = coloring->colors[larray[i]];
3602     }
3603     ierr = PetscFree(larray);CHKERRQ(ierr);
3604     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3605     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3606     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3607   } else {
3608     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3609   }
3610 
3611   PetscFunctionReturn(0);
3612 }
3613 
3614 #if defined(PETSC_HAVE_ADIC)
3615 #undef __FUNCT__
3616 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3617 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3618 {
3619   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3620   PetscErrorCode ierr;
3621 
3622   PetscFunctionBegin;
3623   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3624   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3625   PetscFunctionReturn(0);
3626 }
3627 #endif
3628 
3629 #undef __FUNCT__
3630 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3631 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3632 {
3633   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3634   PetscErrorCode ierr;
3635 
3636   PetscFunctionBegin;
3637   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3638   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3639   PetscFunctionReturn(0);
3640 }
3641 
3642 #undef __FUNCT__
3643 #define __FUNCT__ "MatMerge"
3644 /*@
3645       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3646                  matrices from each processor
3647 
3648     Collective on MPI_Comm
3649 
3650    Input Parameters:
3651 +    comm - the communicators the parallel matrix will live on
3652 .    inmat - the input sequential matrices
3653 .    n - number of local columns (or PETSC_DECIDE)
3654 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3655 
3656    Output Parameter:
3657 .    outmat - the parallel matrix generated
3658 
3659     Level: advanced
3660 
3661    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3662 
3663 @*/
3664 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3665 {
3666   PetscErrorCode ierr;
3667   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3668   PetscInt       *indx;
3669   PetscScalar    *values;
3670 
3671   PetscFunctionBegin;
3672   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3673   if (scall == MAT_INITIAL_MATRIX){
3674     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3675     if (n == PETSC_DECIDE){
3676       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3677     }
3678     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3679     rstart -= m;
3680 
3681     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3682     for (i=0;i<m;i++) {
3683       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3684       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3685       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3686     }
3687     /* This routine will ONLY return MPIAIJ type matrix */
3688     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3689     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3690     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3691     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3692     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3693 
3694   } else if (scall == MAT_REUSE_MATRIX){
3695     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3696   } else {
3697     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3698   }
3699 
3700   for (i=0;i<m;i++) {
3701     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3702     Ii    = i + rstart;
3703     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3704     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3705   }
3706   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3707   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3708   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3709 
3710   PetscFunctionReturn(0);
3711 }
3712 
3713 #undef __FUNCT__
3714 #define __FUNCT__ "MatFileSplit"
3715 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3716 {
3717   PetscErrorCode    ierr;
3718   PetscMPIInt       rank;
3719   PetscInt          m,N,i,rstart,nnz;
3720   size_t            len;
3721   const PetscInt    *indx;
3722   PetscViewer       out;
3723   char              *name;
3724   Mat               B;
3725   const PetscScalar *values;
3726 
3727   PetscFunctionBegin;
3728   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3729   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3730   /* Should this be the type of the diagonal block of A? */
3731   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3732   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3733   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3734   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3735   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3736   for (i=0;i<m;i++) {
3737     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3738     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3739     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3740   }
3741   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3742   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3743 
3744   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3745   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3746   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3747   sprintf(name,"%s.%d",outfile,rank);
3748   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3749   ierr = PetscFree(name);
3750   ierr = MatView(B,out);CHKERRQ(ierr);
3751   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3752   ierr = MatDestroy(B);CHKERRQ(ierr);
3753   PetscFunctionReturn(0);
3754 }
3755 
3756 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3757 #undef __FUNCT__
3758 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3759 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3760 {
3761   PetscErrorCode       ierr;
3762   Mat_Merge_SeqsToMPI  *merge;
3763   PetscContainer       container;
3764 
3765   PetscFunctionBegin;
3766   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3767   if (container) {
3768     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3769     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3770     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3771     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3772     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3773     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3774     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3775     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3776     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3777     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3778     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3779     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
3780 
3781     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
3782     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3783   }
3784   ierr = PetscFree(merge);CHKERRQ(ierr);
3785 
3786   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3787   PetscFunctionReturn(0);
3788 }
3789 
3790 #include "src/mat/utils/freespace.h"
3791 #include "petscbt.h"
3792 
3793 #undef __FUNCT__
3794 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3795 /*@C
3796       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3797                  matrices from each processor
3798 
3799     Collective on MPI_Comm
3800 
3801    Input Parameters:
3802 +    comm - the communicators the parallel matrix will live on
3803 .    seqmat - the input sequential matrices
3804 .    m - number of local rows (or PETSC_DECIDE)
3805 .    n - number of local columns (or PETSC_DECIDE)
3806 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3807 
3808    Output Parameter:
3809 .    mpimat - the parallel matrix generated
3810 
3811     Level: advanced
3812 
3813    Notes:
3814      The dimensions of the sequential matrix in each processor MUST be the same.
3815      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3816      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3817 @*/
3818 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3819 {
3820   PetscErrorCode       ierr;
3821   MPI_Comm             comm=((PetscObject)mpimat)->comm;
3822   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3823   PetscMPIInt          size,rank,taga,*len_s;
3824   PetscInt             N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j;
3825   PetscInt             proc,m;
3826   PetscInt             **buf_ri,**buf_rj;
3827   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3828   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3829   MPI_Request          *s_waits,*r_waits;
3830   MPI_Status           *status;
3831   MatScalar            *aa=a->a;
3832   MatScalar            **abuf_r,*ba_i;
3833   Mat_Merge_SeqsToMPI  *merge;
3834   PetscContainer       container;
3835 
3836   PetscFunctionBegin;
3837   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3838 
3839   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3840   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3841 
3842   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3843   if (container) {
3844     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3845   }
3846   bi     = merge->bi;
3847   bj     = merge->bj;
3848   buf_ri = merge->buf_ri;
3849   buf_rj = merge->buf_rj;
3850 
3851   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3852   owners = merge->rowmap.range;
3853   len_s  = merge->len_s;
3854 
3855   /* send and recv matrix values */
3856   /*-----------------------------*/
3857   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3858   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3859 
3860   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3861   for (proc=0,k=0; proc<size; proc++){
3862     if (!len_s[proc]) continue;
3863     i = owners[proc];
3864     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3865     k++;
3866   }
3867 
3868   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3869   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3870   ierr = PetscFree(status);CHKERRQ(ierr);
3871 
3872   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3873   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3874 
3875   /* insert mat values of mpimat */
3876   /*----------------------------*/
3877   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
3878   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3879   nextrow = buf_ri_k + merge->nrecv;
3880   nextai  = nextrow + merge->nrecv;
3881 
3882   for (k=0; k<merge->nrecv; k++){
3883     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3884     nrows = *(buf_ri_k[k]);
3885     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3886     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3887   }
3888 
3889   /* set values of ba */
3890   m = merge->rowmap.n;
3891   for (i=0; i<m; i++) {
3892     arow = owners[rank] + i;
3893     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3894     bnzi = bi[i+1] - bi[i];
3895     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
3896 
3897     /* add local non-zero vals of this proc's seqmat into ba */
3898     anzi = ai[arow+1] - ai[arow];
3899     aj   = a->j + ai[arow];
3900     aa   = a->a + ai[arow];
3901     nextaj = 0;
3902     for (j=0; nextaj<anzi; j++){
3903       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3904         ba_i[j] += aa[nextaj++];
3905       }
3906     }
3907 
3908     /* add received vals into ba */
3909     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3910       /* i-th row */
3911       if (i == *nextrow[k]) {
3912         anzi = *(nextai[k]+1) - *nextai[k];
3913         aj   = buf_rj[k] + *(nextai[k]);
3914         aa   = abuf_r[k] + *(nextai[k]);
3915         nextaj = 0;
3916         for (j=0; nextaj<anzi; j++){
3917           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3918             ba_i[j] += aa[nextaj++];
3919           }
3920         }
3921         nextrow[k]++; nextai[k]++;
3922       }
3923     }
3924     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3925   }
3926   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3927   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3928 
3929   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3930   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3931   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3932   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3933   PetscFunctionReturn(0);
3934 }
3935 
3936 #undef __FUNCT__
3937 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3938 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3939 {
3940   PetscErrorCode       ierr;
3941   Mat                  B_mpi;
3942   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3943   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3944   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3945   PetscInt             M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j;
3946   PetscInt             len,proc,*dnz,*onz;
3947   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3948   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3949   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3950   MPI_Status           *status;
3951   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
3952   PetscBT              lnkbt;
3953   Mat_Merge_SeqsToMPI  *merge;
3954   PetscContainer       container;
3955 
3956   PetscFunctionBegin;
3957   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3958 
3959   /* make sure it is a PETSc comm */
3960   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3961   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3962   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3963 
3964   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3965   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3966 
3967   /* determine row ownership */
3968   /*---------------------------------------------------------*/
3969   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
3970   merge->rowmap.n = m;
3971   merge->rowmap.N = M;
3972   merge->rowmap.bs = 1;
3973   ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr);
3974   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3975   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3976 
3977   m      = merge->rowmap.n;
3978   M      = merge->rowmap.N;
3979   owners = merge->rowmap.range;
3980 
3981   /* determine the number of messages to send, their lengths */
3982   /*---------------------------------------------------------*/
3983   len_s  = merge->len_s;
3984 
3985   len = 0;  /* length of buf_si[] */
3986   merge->nsend = 0;
3987   for (proc=0; proc<size; proc++){
3988     len_si[proc] = 0;
3989     if (proc == rank){
3990       len_s[proc] = 0;
3991     } else {
3992       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3993       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3994     }
3995     if (len_s[proc]) {
3996       merge->nsend++;
3997       nrows = 0;
3998       for (i=owners[proc]; i<owners[proc+1]; i++){
3999         if (ai[i+1] > ai[i]) nrows++;
4000       }
4001       len_si[proc] = 2*(nrows+1);
4002       len += len_si[proc];
4003     }
4004   }
4005 
4006   /* determine the number and length of messages to receive for ij-structure */
4007   /*-------------------------------------------------------------------------*/
4008   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4009   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4010 
4011   /* post the Irecv of j-structure */
4012   /*-------------------------------*/
4013   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4014   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4015 
4016   /* post the Isend of j-structure */
4017   /*--------------------------------*/
4018   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
4019   sj_waits = si_waits + merge->nsend;
4020 
4021   for (proc=0, k=0; proc<size; proc++){
4022     if (!len_s[proc]) continue;
4023     i = owners[proc];
4024     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4025     k++;
4026   }
4027 
4028   /* receives and sends of j-structure are complete */
4029   /*------------------------------------------------*/
4030   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4031   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4032 
4033   /* send and recv i-structure */
4034   /*---------------------------*/
4035   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4036   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4037 
4038   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4039   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4040   for (proc=0,k=0; proc<size; proc++){
4041     if (!len_s[proc]) continue;
4042     /* form outgoing message for i-structure:
4043          buf_si[0]:                 nrows to be sent
4044                [1:nrows]:           row index (global)
4045                [nrows+1:2*nrows+1]: i-structure index
4046     */
4047     /*-------------------------------------------*/
4048     nrows = len_si[proc]/2 - 1;
4049     buf_si_i    = buf_si + nrows+1;
4050     buf_si[0]   = nrows;
4051     buf_si_i[0] = 0;
4052     nrows = 0;
4053     for (i=owners[proc]; i<owners[proc+1]; i++){
4054       anzi = ai[i+1] - ai[i];
4055       if (anzi) {
4056         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4057         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4058         nrows++;
4059       }
4060     }
4061     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4062     k++;
4063     buf_si += len_si[proc];
4064   }
4065 
4066   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4067   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4068 
4069   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4070   for (i=0; i<merge->nrecv; i++){
4071     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4072   }
4073 
4074   ierr = PetscFree(len_si);CHKERRQ(ierr);
4075   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4076   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4077   ierr = PetscFree(si_waits);CHKERRQ(ierr);
4078   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4079   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4080   ierr = PetscFree(status);CHKERRQ(ierr);
4081 
4082   /* compute a local seq matrix in each processor */
4083   /*----------------------------------------------*/
4084   /* allocate bi array and free space for accumulating nonzero column info */
4085   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4086   bi[0] = 0;
4087 
4088   /* create and initialize a linked list */
4089   nlnk = N+1;
4090   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4091 
4092   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4093   len = 0;
4094   len  = ai[owners[rank+1]] - ai[owners[rank]];
4095   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4096   current_space = free_space;
4097 
4098   /* determine symbolic info for each local row */
4099   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4100   nextrow = buf_ri_k + merge->nrecv;
4101   nextai  = nextrow + merge->nrecv;
4102   for (k=0; k<merge->nrecv; k++){
4103     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4104     nrows = *buf_ri_k[k];
4105     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4106     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4107   }
4108 
4109   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4110   len = 0;
4111   for (i=0;i<m;i++) {
4112     bnzi   = 0;
4113     /* add local non-zero cols of this proc's seqmat into lnk */
4114     arow   = owners[rank] + i;
4115     anzi   = ai[arow+1] - ai[arow];
4116     aj     = a->j + ai[arow];
4117     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4118     bnzi += nlnk;
4119     /* add received col data into lnk */
4120     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4121       if (i == *nextrow[k]) { /* i-th row */
4122         anzi = *(nextai[k]+1) - *nextai[k];
4123         aj   = buf_rj[k] + *nextai[k];
4124         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4125         bnzi += nlnk;
4126         nextrow[k]++; nextai[k]++;
4127       }
4128     }
4129     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4130 
4131     /* if free space is not available, make more free space */
4132     if (current_space->local_remaining<bnzi) {
4133       ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
4134       nspacedouble++;
4135     }
4136     /* copy data into free space, then initialize lnk */
4137     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4138     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4139 
4140     current_space->array           += bnzi;
4141     current_space->local_used      += bnzi;
4142     current_space->local_remaining -= bnzi;
4143 
4144     bi[i+1] = bi[i] + bnzi;
4145   }
4146 
4147   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4148 
4149   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4150   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4151   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4152 
4153   /* create symbolic parallel matrix B_mpi */
4154   /*---------------------------------------*/
4155   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4156   if (n==PETSC_DECIDE) {
4157     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4158   } else {
4159     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4160   }
4161   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4162   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4163   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4164 
4165   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4166   B_mpi->assembled     = PETSC_FALSE;
4167   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4168   merge->bi            = bi;
4169   merge->bj            = bj;
4170   merge->buf_ri        = buf_ri;
4171   merge->buf_rj        = buf_rj;
4172   merge->coi           = PETSC_NULL;
4173   merge->coj           = PETSC_NULL;
4174   merge->owners_co     = PETSC_NULL;
4175 
4176   /* attach the supporting struct to B_mpi for reuse */
4177   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4178   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4179   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4180   *mpimat = B_mpi;
4181 
4182   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4183   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4184   PetscFunctionReturn(0);
4185 }
4186 
4187 #undef __FUNCT__
4188 #define __FUNCT__ "MatMerge_SeqsToMPI"
4189 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4190 {
4191   PetscErrorCode   ierr;
4192 
4193   PetscFunctionBegin;
4194   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4195   if (scall == MAT_INITIAL_MATRIX){
4196     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4197   }
4198   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4199   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4200   PetscFunctionReturn(0);
4201 }
4202 
4203 #undef __FUNCT__
4204 #define __FUNCT__ "MatGetLocalMat"
4205 /*@
4206      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4207 
4208     Not Collective
4209 
4210    Input Parameters:
4211 +    A - the matrix
4212 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4213 
4214    Output Parameter:
4215 .    A_loc - the local sequential matrix generated
4216 
4217     Level: developer
4218 
4219 @*/
4220 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4221 {
4222   PetscErrorCode  ierr;
4223   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4224   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4225   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4226   MatScalar       *aa=a->a,*ba=b->a,*cam;
4227   PetscScalar     *ca;
4228   PetscInt        am=A->rmap.n,i,j,k,cstart=A->cmap.rstart;
4229   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4230 
4231   PetscFunctionBegin;
4232   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4233   if (scall == MAT_INITIAL_MATRIX){
4234     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4235     ci[0] = 0;
4236     for (i=0; i<am; i++){
4237       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4238     }
4239     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4240     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4241     k = 0;
4242     for (i=0; i<am; i++) {
4243       ncols_o = bi[i+1] - bi[i];
4244       ncols_d = ai[i+1] - ai[i];
4245       /* off-diagonal portion of A */
4246       for (jo=0; jo<ncols_o; jo++) {
4247         col = cmap[*bj];
4248         if (col >= cstart) break;
4249         cj[k]   = col; bj++;
4250         ca[k++] = *ba++;
4251       }
4252       /* diagonal portion of A */
4253       for (j=0; j<ncols_d; j++) {
4254         cj[k]   = cstart + *aj++;
4255         ca[k++] = *aa++;
4256       }
4257       /* off-diagonal portion of A */
4258       for (j=jo; j<ncols_o; j++) {
4259         cj[k]   = cmap[*bj++];
4260         ca[k++] = *ba++;
4261       }
4262     }
4263     /* put together the new matrix */
4264     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4265     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4266     /* Since these are PETSc arrays, change flags to free them as necessary. */
4267     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4268     mat->free_a  = PETSC_TRUE;
4269     mat->free_ij = PETSC_TRUE;
4270     mat->nonew   = 0;
4271   } else if (scall == MAT_REUSE_MATRIX){
4272     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4273     ci = mat->i; cj = mat->j; cam = mat->a;
4274     for (i=0; i<am; i++) {
4275       /* off-diagonal portion of A */
4276       ncols_o = bi[i+1] - bi[i];
4277       for (jo=0; jo<ncols_o; jo++) {
4278         col = cmap[*bj];
4279         if (col >= cstart) break;
4280         *cam++ = *ba++; bj++;
4281       }
4282       /* diagonal portion of A */
4283       ncols_d = ai[i+1] - ai[i];
4284       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4285       /* off-diagonal portion of A */
4286       for (j=jo; j<ncols_o; j++) {
4287         *cam++ = *ba++; bj++;
4288       }
4289     }
4290   } else {
4291     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4292   }
4293 
4294   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4295   PetscFunctionReturn(0);
4296 }
4297 
4298 #undef __FUNCT__
4299 #define __FUNCT__ "MatGetLocalMatCondensed"
4300 /*@C
4301      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4302 
4303     Not Collective
4304 
4305    Input Parameters:
4306 +    A - the matrix
4307 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4308 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4309 
4310    Output Parameter:
4311 .    A_loc - the local sequential matrix generated
4312 
4313     Level: developer
4314 
4315 @*/
4316 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4317 {
4318   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4319   PetscErrorCode    ierr;
4320   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4321   IS                isrowa,iscola;
4322   Mat               *aloc;
4323 
4324   PetscFunctionBegin;
4325   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4326   if (!row){
4327     start = A->rmap.rstart; end = A->rmap.rend;
4328     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4329   } else {
4330     isrowa = *row;
4331   }
4332   if (!col){
4333     start = A->cmap.rstart;
4334     cmap  = a->garray;
4335     nzA   = a->A->cmap.n;
4336     nzB   = a->B->cmap.n;
4337     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4338     ncols = 0;
4339     for (i=0; i<nzB; i++) {
4340       if (cmap[i] < start) idx[ncols++] = cmap[i];
4341       else break;
4342     }
4343     imark = i;
4344     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4345     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4346     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4347     ierr = PetscFree(idx);CHKERRQ(ierr);
4348   } else {
4349     iscola = *col;
4350   }
4351   if (scall != MAT_INITIAL_MATRIX){
4352     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4353     aloc[0] = *A_loc;
4354   }
4355   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4356   *A_loc = aloc[0];
4357   ierr = PetscFree(aloc);CHKERRQ(ierr);
4358   if (!row){
4359     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4360   }
4361   if (!col){
4362     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4363   }
4364   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4365   PetscFunctionReturn(0);
4366 }
4367 
4368 #undef __FUNCT__
4369 #define __FUNCT__ "MatGetBrowsOfAcols"
4370 /*@C
4371     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4372 
4373     Collective on Mat
4374 
4375    Input Parameters:
4376 +    A,B - the matrices in mpiaij format
4377 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4378 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4379 
4380    Output Parameter:
4381 +    rowb, colb - index sets of rows and columns of B to extract
4382 .    brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows
4383 -    B_seq - the sequential matrix generated
4384 
4385     Level: developer
4386 
4387 @*/
4388 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4389 {
4390   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4391   PetscErrorCode    ierr;
4392   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4393   IS                isrowb,iscolb;
4394   Mat               *bseq;
4395 
4396   PetscFunctionBegin;
4397   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4398     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4399   }
4400   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4401 
4402   if (scall == MAT_INITIAL_MATRIX){
4403     start = A->cmap.rstart;
4404     cmap  = a->garray;
4405     nzA   = a->A->cmap.n;
4406     nzB   = a->B->cmap.n;
4407     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4408     ncols = 0;
4409     for (i=0; i<nzB; i++) {  /* row < local row index */
4410       if (cmap[i] < start) idx[ncols++] = cmap[i];
4411       else break;
4412     }
4413     imark = i;
4414     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4415     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4416     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4417     ierr = PetscFree(idx);CHKERRQ(ierr);
4418     *brstart = imark;
4419     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr);
4420   } else {
4421     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4422     isrowb = *rowb; iscolb = *colb;
4423     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4424     bseq[0] = *B_seq;
4425   }
4426   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4427   *B_seq = bseq[0];
4428   ierr = PetscFree(bseq);CHKERRQ(ierr);
4429   if (!rowb){
4430     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4431   } else {
4432     *rowb = isrowb;
4433   }
4434   if (!colb){
4435     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4436   } else {
4437     *colb = iscolb;
4438   }
4439   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4440   PetscFunctionReturn(0);
4441 }
4442 
4443 #undef __FUNCT__
4444 #define __FUNCT__ "MatGetBrowsOfAoCols"
4445 /*@C
4446     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4447     of the OFF-DIAGONAL portion of local A
4448 
4449     Collective on Mat
4450 
4451    Input Parameters:
4452 +    A,B - the matrices in mpiaij format
4453 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4454 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4455 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4456 
4457    Output Parameter:
4458 +    B_oth - the sequential matrix generated
4459 
4460     Level: developer
4461 
4462 @*/
4463 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth)
4464 {
4465   VecScatter_MPI_General *gen_to,*gen_from;
4466   PetscErrorCode         ierr;
4467   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4468   Mat_SeqAIJ             *b_oth;
4469   VecScatter             ctx=a->Mvctx;
4470   MPI_Comm               comm=((PetscObject)ctx)->comm;
4471   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4472   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj;
4473   PetscScalar            *rvalues,*svalues;
4474   MatScalar              *b_otha,*bufa,*bufA;
4475   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4476   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4477   MPI_Status             *sstatus,rstatus;
4478   PetscMPIInt            jj;
4479   PetscInt               *cols,sbs,rbs;
4480   PetscScalar            *vals;
4481 
4482   PetscFunctionBegin;
4483   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4484     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4485   }
4486   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4487   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4488 
4489   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4490   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4491   rvalues  = gen_from->values; /* holds the length of receiving row */
4492   svalues  = gen_to->values;   /* holds the length of sending row */
4493   nrecvs   = gen_from->n;
4494   nsends   = gen_to->n;
4495 
4496   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4497   srow     = gen_to->indices;   /* local row index to be sent */
4498   sstarts  = gen_to->starts;
4499   sprocs   = gen_to->procs;
4500   sstatus  = gen_to->sstatus;
4501   sbs      = gen_to->bs;
4502   rstarts  = gen_from->starts;
4503   rprocs   = gen_from->procs;
4504   rbs      = gen_from->bs;
4505 
4506   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4507   if (scall == MAT_INITIAL_MATRIX){
4508     /* i-array */
4509     /*---------*/
4510     /*  post receives */
4511     for (i=0; i<nrecvs; i++){
4512       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4513       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4514       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4515     }
4516 
4517     /* pack the outgoing message */
4518     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4519     rstartsj = sstartsj + nsends +1;
4520     sstartsj[0] = 0;  rstartsj[0] = 0;
4521     len = 0; /* total length of j or a array to be sent */
4522     k = 0;
4523     for (i=0; i<nsends; i++){
4524       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4525       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4526       for (j=0; j<nrows; j++) {
4527         row = srow[k] + B->rmap.range[rank]; /* global row idx */
4528         for (l=0; l<sbs; l++){
4529           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4530           rowlen[j*sbs+l] = ncols;
4531           len += ncols;
4532           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4533         }
4534         k++;
4535       }
4536       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4537       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4538     }
4539     /* recvs and sends of i-array are completed */
4540     i = nrecvs;
4541     while (i--) {
4542       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4543     }
4544     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4545 
4546     /* allocate buffers for sending j and a arrays */
4547     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4548     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4549 
4550     /* create i-array of B_oth */
4551     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4552     b_othi[0] = 0;
4553     len = 0; /* total length of j or a array to be received */
4554     k = 0;
4555     for (i=0; i<nrecvs; i++){
4556       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4557       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4558       for (j=0; j<nrows; j++) {
4559         b_othi[k+1] = b_othi[k] + rowlen[j];
4560         len += rowlen[j]; k++;
4561       }
4562       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4563     }
4564 
4565     /* allocate space for j and a arrrays of B_oth */
4566     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4567     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4568 
4569     /* j-array */
4570     /*---------*/
4571     /*  post receives of j-array */
4572     for (i=0; i<nrecvs; i++){
4573       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4574       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4575     }
4576 
4577     /* pack the outgoing message j-array */
4578     k = 0;
4579     for (i=0; i<nsends; i++){
4580       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4581       bufJ = bufj+sstartsj[i];
4582       for (j=0; j<nrows; j++) {
4583         row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4584         for (ll=0; ll<sbs; ll++){
4585           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4586           for (l=0; l<ncols; l++){
4587             *bufJ++ = cols[l];
4588           }
4589           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4590         }
4591       }
4592       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4593     }
4594 
4595     /* recvs and sends of j-array are completed */
4596     i = nrecvs;
4597     while (i--) {
4598       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4599     }
4600     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4601   } else if (scall == MAT_REUSE_MATRIX){
4602     sstartsj = *startsj;
4603     rstartsj = sstartsj + nsends +1;
4604     bufa     = *bufa_ptr;
4605     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4606     b_otha   = b_oth->a;
4607   } else {
4608     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4609   }
4610 
4611   /* a-array */
4612   /*---------*/
4613   /*  post receives of a-array */
4614   for (i=0; i<nrecvs; i++){
4615     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4616     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4617   }
4618 
4619   /* pack the outgoing message a-array */
4620   k = 0;
4621   for (i=0; i<nsends; i++){
4622     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4623     bufA = bufa+sstartsj[i];
4624     for (j=0; j<nrows; j++) {
4625       row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4626       for (ll=0; ll<sbs; ll++){
4627         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4628         for (l=0; l<ncols; l++){
4629           *bufA++ = vals[l];
4630         }
4631         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4632       }
4633     }
4634     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4635   }
4636   /* recvs and sends of a-array are completed */
4637   i = nrecvs;
4638   while (i--) {
4639     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4640   }
4641   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4642   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4643 
4644   if (scall == MAT_INITIAL_MATRIX){
4645     /* put together the new matrix */
4646     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4647 
4648     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4649     /* Since these are PETSc arrays, change flags to free them as necessary. */
4650     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4651     b_oth->free_a  = PETSC_TRUE;
4652     b_oth->free_ij = PETSC_TRUE;
4653     b_oth->nonew   = 0;
4654 
4655     ierr = PetscFree(bufj);CHKERRQ(ierr);
4656     if (!startsj || !bufa_ptr){
4657       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4658       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4659     } else {
4660       *startsj  = sstartsj;
4661       *bufa_ptr = bufa;
4662     }
4663   }
4664   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4665   PetscFunctionReturn(0);
4666 }
4667 
4668 #undef __FUNCT__
4669 #define __FUNCT__ "MatGetCommunicationStructs"
4670 /*@C
4671   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4672 
4673   Not Collective
4674 
4675   Input Parameters:
4676 . A - The matrix in mpiaij format
4677 
4678   Output Parameter:
4679 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4680 . colmap - A map from global column index to local index into lvec
4681 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4682 
4683   Level: developer
4684 
4685 @*/
4686 #if defined (PETSC_USE_CTABLE)
4687 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4688 #else
4689 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4690 #endif
4691 {
4692   Mat_MPIAIJ *a;
4693 
4694   PetscFunctionBegin;
4695   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4696   PetscValidPointer(lvec, 2)
4697   PetscValidPointer(colmap, 3)
4698   PetscValidPointer(multScatter, 4)
4699   a = (Mat_MPIAIJ *) A->data;
4700   if (lvec) *lvec = a->lvec;
4701   if (colmap) *colmap = a->colmap;
4702   if (multScatter) *multScatter = a->Mvctx;
4703   PetscFunctionReturn(0);
4704 }
4705 
4706 EXTERN_C_BEGIN
4707 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*);
4708 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*);
4709 EXTERN_C_END
4710 
4711 #include "src/mat/impls/dense/mpi/mpidense.h"
4712 
4713 #undef __FUNCT__
4714 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4715 /*
4716     Computes (B'*A')' since computing B*A directly is untenable
4717 
4718                n                       p                          p
4719         (              )       (              )         (                  )
4720       m (      A       )  *  n (       B      )   =   m (         C        )
4721         (              )       (              )         (                  )
4722 
4723 */
4724 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4725 {
4726   PetscErrorCode     ierr;
4727   Mat_MPIDense       *sub_a = (Mat_MPIDense*)A->data;
4728   Mat_MPIAIJ         *sub_b = (Mat_MPIAIJ*)B->data;
4729   Mat_MPIDense       *sub_c = (Mat_MPIDense*)C->data;
4730   PetscInt           i,n,m,q,p;
4731   const PetscInt     *ii,*idx;
4732   const PetscScalar  *b,*a,*a_q;
4733   PetscScalar        *c,*c_q;
4734   Mat                At,Bt,Ct;
4735 
4736   PetscFunctionBegin;
4737   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4738   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4739   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4740   ierr = MatDestroy(At);CHKERRQ(ierr);
4741   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4742   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4743 
4744   C->assembled = PETSC_TRUE;
4745   ierr = MatAssemblyBegin(sub_c->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4746   ierr = MatAssemblyEnd(sub_c->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4747   if (!C->was_assembled) {
4748     ierr = MatSetUpMultiply_MPIDense(C);CHKERRQ(ierr);
4749   }
4750   PetscFunctionReturn(0);
4751 }
4752 
4753 #undef __FUNCT__
4754 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4755 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4756 {
4757   PetscErrorCode ierr;
4758   PetscInt       m=A->rmap.n,n=B->cmap.n;
4759   Mat            Cmat;
4760 
4761   PetscFunctionBegin;
4762   if (A->cmap.n != B->rmap.n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap.n %d != B->rmap.n %d\n",A->cmap.n,B->rmap.n);
4763   ierr = MatCreate(A->hdr.comm,&Cmat);CHKERRQ(ierr);
4764   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4765   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4766   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
4767   Cmat->assembled = PETSC_TRUE;
4768   *C              = Cmat;
4769   PetscFunctionReturn(0);
4770 }
4771 
4772 /* ----------------------------------------------------------------*/
4773 #undef __FUNCT__
4774 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4775 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4776 {
4777   PetscErrorCode ierr;
4778 
4779   PetscFunctionBegin;
4780   if (scall == MAT_INITIAL_MATRIX){
4781     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4782   }
4783   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4784   PetscFunctionReturn(0);
4785 }
4786 
4787 /*MC
4788    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4789 
4790    Options Database Keys:
4791 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4792 
4793   Level: beginner
4794 
4795 .seealso: MatCreateMPIAIJ()
4796 M*/
4797 
4798 EXTERN_C_BEGIN
4799 #undef __FUNCT__
4800 #define __FUNCT__ "MatCreate_MPIAIJ"
4801 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
4802 {
4803   Mat_MPIAIJ     *b;
4804   PetscErrorCode ierr;
4805   PetscMPIInt    size;
4806 
4807   PetscFunctionBegin;
4808   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
4809 
4810   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
4811   B->data         = (void*)b;
4812   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4813   B->factor       = 0;
4814   B->rmap.bs      = 1;
4815   B->assembled    = PETSC_FALSE;
4816   B->mapping      = 0;
4817 
4818   B->insertmode      = NOT_SET_VALUES;
4819   b->size            = size;
4820   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
4821 
4822   /* build cache for off array entries formed */
4823   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
4824   b->donotstash  = PETSC_FALSE;
4825   b->colmap      = 0;
4826   b->garray      = 0;
4827   b->roworiented = PETSC_TRUE;
4828 
4829   /* stuff used for matrix vector multiply */
4830   b->lvec      = PETSC_NULL;
4831   b->Mvctx     = PETSC_NULL;
4832 
4833   /* stuff for MatGetRow() */
4834   b->rowindices   = 0;
4835   b->rowvalues    = 0;
4836   b->getrowactive = PETSC_FALSE;
4837 
4838 
4839   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
4840                                      "MatStoreValues_MPIAIJ",
4841                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
4842   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
4843                                      "MatRetrieveValues_MPIAIJ",
4844                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
4845   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
4846 				     "MatGetDiagonalBlock_MPIAIJ",
4847                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
4848   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
4849 				     "MatIsTranspose_MPIAIJ",
4850 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
4851   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
4852 				     "MatMPIAIJSetPreallocation_MPIAIJ",
4853 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
4854   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
4855 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
4856 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
4857   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
4858 				     "MatDiagonalScaleLocal_MPIAIJ",
4859 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
4860   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
4861                                      "MatConvert_MPIAIJ_MPICSRPERM",
4862                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
4863   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
4864                                      "MatConvert_MPIAIJ_MPICRL",
4865                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
4866   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
4867                                      "MatMatMult_MPIDense_MPIAIJ",
4868                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
4869   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
4870                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
4871                                       MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
4872   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
4873                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
4874                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
4875   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
4876   PetscFunctionReturn(0);
4877 }
4878 EXTERN_C_END
4879 
4880 #undef __FUNCT__
4881 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
4882 /*@
4883      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
4884          and "off-diagonal" part of the matrix in CSR format.
4885 
4886    Collective on MPI_Comm
4887 
4888    Input Parameters:
4889 +  comm - MPI communicator
4890 .  m - number of local rows (Cannot be PETSC_DECIDE)
4891 .  n - This value should be the same as the local size used in creating the
4892        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4893        calculated if N is given) For square matrices n is almost always m.
4894 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4895 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4896 .   i - row indices for "diagonal" portion of matrix
4897 .   j - column indices
4898 .   a - matrix values
4899 .   oi - row indices for "off-diagonal" portion of matrix
4900 .   oj - column indices
4901 -   oa - matrix values
4902 
4903    Output Parameter:
4904 .   mat - the matrix
4905 
4906    Level: advanced
4907 
4908    Notes:
4909        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
4910 
4911        The i and j indices are 0 based
4912 
4913        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
4914 
4915 
4916 .keywords: matrix, aij, compressed row, sparse, parallel
4917 
4918 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4919           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
4920 @*/
4921 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
4922 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
4923 {
4924   PetscErrorCode ierr;
4925   Mat_MPIAIJ     *maij;
4926 
4927  PetscFunctionBegin;
4928   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4929   if (i[0]) {
4930     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4931   }
4932   if (oi[0]) {
4933     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
4934   }
4935   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4936   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4937   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4938   maij = (Mat_MPIAIJ*) (*mat)->data;
4939   maij->donotstash     = PETSC_TRUE;
4940   (*mat)->preallocated = PETSC_TRUE;
4941 
4942   (*mat)->rmap.bs = (*mat)->cmap.bs = 1;
4943   ierr = PetscMapSetUp(&(*mat)->rmap);CHKERRQ(ierr);
4944   ierr = PetscMapSetUp(&(*mat)->cmap);CHKERRQ(ierr);
4945 
4946   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
4947   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
4948 
4949   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4950   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4951   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4952   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4953 
4954   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4955   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4956   PetscFunctionReturn(0);
4957 }
4958 
4959 /*
4960     Special version for direct calls from Fortran
4961 */
4962 #if defined(PETSC_HAVE_FORTRAN_CAPS)
4963 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
4964 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
4965 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
4966 #endif
4967 
4968 /* Change these macros so can be used in void function */
4969 #undef CHKERRQ
4970 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr)
4971 #undef SETERRQ2
4972 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr)
4973 #undef SETERRQ
4974 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr)
4975 
4976 EXTERN_C_BEGIN
4977 #undef __FUNCT__
4978 #define __FUNCT__ "matsetvaluesmpiaij_"
4979 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
4980 {
4981   Mat             mat = *mmat;
4982   PetscInt        m = *mm, n = *mn;
4983   InsertMode      addv = *maddv;
4984   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
4985   PetscScalar     value;
4986   PetscErrorCode  ierr;
4987 
4988   MatPreallocated(mat);
4989   if (mat->insertmode == NOT_SET_VALUES) {
4990     mat->insertmode = addv;
4991   }
4992 #if defined(PETSC_USE_DEBUG)
4993   else if (mat->insertmode != addv) {
4994     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4995   }
4996 #endif
4997   {
4998   PetscInt        i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
4999   PetscInt        cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
5000   PetscTruth      roworiented = aij->roworiented;
5001 
5002   /* Some Variables required in the macro */
5003   Mat             A = aij->A;
5004   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5005   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5006   MatScalar       *aa = a->a;
5007   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5008   Mat             B = aij->B;
5009   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5010   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
5011   MatScalar       *ba = b->a;
5012 
5013   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5014   PetscInt        nonew = a->nonew;
5015   MatScalar       *ap1,*ap2;
5016 
5017   PetscFunctionBegin;
5018   for (i=0; i<m; i++) {
5019     if (im[i] < 0) continue;
5020 #if defined(PETSC_USE_DEBUG)
5021     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
5022 #endif
5023     if (im[i] >= rstart && im[i] < rend) {
5024       row      = im[i] - rstart;
5025       lastcol1 = -1;
5026       rp1      = aj + ai[row];
5027       ap1      = aa + ai[row];
5028       rmax1    = aimax[row];
5029       nrow1    = ailen[row];
5030       low1     = 0;
5031       high1    = nrow1;
5032       lastcol2 = -1;
5033       rp2      = bj + bi[row];
5034       ap2      = ba + bi[row];
5035       rmax2    = bimax[row];
5036       nrow2    = bilen[row];
5037       low2     = 0;
5038       high2    = nrow2;
5039 
5040       for (j=0; j<n; j++) {
5041         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5042         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5043         if (in[j] >= cstart && in[j] < cend){
5044           col = in[j] - cstart;
5045           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5046         } else if (in[j] < 0) continue;
5047 #if defined(PETSC_USE_DEBUG)
5048         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
5049 #endif
5050         else {
5051           if (mat->was_assembled) {
5052             if (!aij->colmap) {
5053               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5054             }
5055 #if defined (PETSC_USE_CTABLE)
5056             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5057 	    col--;
5058 #else
5059             col = aij->colmap[in[j]] - 1;
5060 #endif
5061             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5062               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5063               col =  in[j];
5064               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5065               B = aij->B;
5066               b = (Mat_SeqAIJ*)B->data;
5067               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5068               rp2      = bj + bi[row];
5069               ap2      = ba + bi[row];
5070               rmax2    = bimax[row];
5071               nrow2    = bilen[row];
5072               low2     = 0;
5073               high2    = nrow2;
5074               bm       = aij->B->rmap.n;
5075               ba = b->a;
5076             }
5077           } else col = in[j];
5078           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5079         }
5080       }
5081     } else {
5082       if (!aij->donotstash) {
5083         if (roworiented) {
5084           if (ignorezeroentries && v[i*n] == 0.0) continue;
5085           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
5086         } else {
5087           if (ignorezeroentries && v[i] == 0.0) continue;
5088           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
5089         }
5090       }
5091     }
5092   }}
5093   PetscFunctionReturnVoid();
5094 }
5095 EXTERN_C_END
5096 
5097