xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 32a366e4818484d294886ad81174ea261ee306ed)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "../src/inline/spops.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscTruth     aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap->n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap->n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap->n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap->n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 
207 #define CHUNKSIZE   15
208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
209 { \
210     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
211     lastcol1 = col;\
212     while (high1-low1 > 5) { \
213       t = (low1+high1)/2; \
214       if (rp1[t] > col) high1 = t; \
215       else             low1  = t; \
216     } \
217       for (_i=low1; _i<high1; _i++) { \
218         if (rp1[_i] > col) break; \
219         if (rp1[_i] == col) { \
220           if (addv == ADD_VALUES) ap1[_i] += value;   \
221           else                    ap1[_i] = value; \
222           goto a_noinsert; \
223         } \
224       }  \
225       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
226       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
227       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
228       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
229       N = nrow1++ - 1; a->nz++; high1++; \
230       /* shift up all the later entries in this row */ \
231       for (ii=N; ii>=_i; ii--) { \
232         rp1[ii+1] = rp1[ii]; \
233         ap1[ii+1] = ap1[ii]; \
234       } \
235       rp1[_i] = col;  \
236       ap1[_i] = value;  \
237       a_noinsert: ; \
238       ailen[row] = nrow1; \
239 }
240 
241 
242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
243 { \
244     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
245     lastcol2 = col;\
246     while (high2-low2 > 5) { \
247       t = (low2+high2)/2; \
248       if (rp2[t] > col) high2 = t; \
249       else             low2  = t; \
250     } \
251     for (_i=low2; _i<high2; _i++) {		\
252       if (rp2[_i] > col) break;			\
253       if (rp2[_i] == col) {			      \
254 	if (addv == ADD_VALUES) ap2[_i] += value;     \
255 	else                    ap2[_i] = value;      \
256 	goto b_noinsert;			      \
257       }						      \
258     }							      \
259     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
260     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
261     if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
262     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
263     N = nrow2++ - 1; b->nz++; high2++;					\
264     /* shift up all the later entries in this row */			\
265     for (ii=N; ii>=_i; ii--) {						\
266       rp2[ii+1] = rp2[ii];						\
267       ap2[ii+1] = ap2[ii];						\
268     }									\
269     rp2[_i] = col;							\
270     ap2[_i] = value;							\
271     b_noinsert: ;								\
272     bilen[row] = nrow2;							\
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
278 {
279   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
280   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
281   PetscErrorCode ierr;
282   PetscInt       l,*garray = mat->garray,diag;
283 
284   PetscFunctionBegin;
285   /* code only works for square matrices A */
286 
287   /* find size of row to the left of the diagonal part */
288   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
289   row  = row - diag;
290   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
291     if (garray[b->j[b->i[row]+l]] > diag) break;
292   }
293   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
294 
295   /* diagonal part */
296   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
297 
298   /* right of diagonal part */
299   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
300   PetscFunctionReturn(0);
301 }
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "MatSetValues_MPIAIJ"
305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
306 {
307   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
308   PetscScalar    value;
309   PetscErrorCode ierr;
310   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
311   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
312   PetscTruth     roworiented = aij->roworiented;
313 
314   /* Some Variables required in the macro */
315   Mat            A = aij->A;
316   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
317   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
318   MatScalar      *aa = a->a;
319   PetscTruth     ignorezeroentries = a->ignorezeroentries;
320   Mat            B = aij->B;
321   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
322   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
323   MatScalar      *ba = b->a;
324 
325   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
326   PetscInt       nonew = a->nonew;
327   MatScalar      *ap1,*ap2;
328 
329   PetscFunctionBegin;
330   for (i=0; i<m; i++) {
331     if (im[i] < 0) continue;
332 #if defined(PETSC_USE_DEBUG)
333     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
334 #endif
335     if (im[i] >= rstart && im[i] < rend) {
336       row      = im[i] - rstart;
337       lastcol1 = -1;
338       rp1      = aj + ai[row];
339       ap1      = aa + ai[row];
340       rmax1    = aimax[row];
341       nrow1    = ailen[row];
342       low1     = 0;
343       high1    = nrow1;
344       lastcol2 = -1;
345       rp2      = bj + bi[row];
346       ap2      = ba + bi[row];
347       rmax2    = bimax[row];
348       nrow2    = bilen[row];
349       low2     = 0;
350       high2    = nrow2;
351 
352       for (j=0; j<n; j++) {
353         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
354         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
355         if (in[j] >= cstart && in[j] < cend){
356           col = in[j] - cstart;
357           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
358         } else if (in[j] < 0) continue;
359 #if defined(PETSC_USE_DEBUG)
360         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
361 #endif
362         else {
363           if (mat->was_assembled) {
364             if (!aij->colmap) {
365               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
366             }
367 #if defined (PETSC_USE_CTABLE)
368             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
369 	    col--;
370 #else
371             col = aij->colmap[in[j]] - 1;
372 #endif
373             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
374               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
375               col =  in[j];
376               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
377               B = aij->B;
378               b = (Mat_SeqAIJ*)B->data;
379               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
380               rp2      = bj + bi[row];
381               ap2      = ba + bi[row];
382               rmax2    = bimax[row];
383               nrow2    = bilen[row];
384               low2     = 0;
385               high2    = nrow2;
386               bm       = aij->B->rmap->n;
387               ba = b->a;
388             }
389           } else col = in[j];
390           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
391         }
392       }
393     } else {
394       if (!aij->donotstash) {
395         if (roworiented) {
396           if (ignorezeroentries && v[i*n] == 0.0) continue;
397           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
398         } else {
399           if (ignorezeroentries && v[i] == 0.0) continue;
400           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
401         }
402       }
403     }
404   }
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNCT__
409 #define __FUNCT__ "MatGetValues_MPIAIJ"
410 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
411 {
412   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
413   PetscErrorCode ierr;
414   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
415   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
416 
417   PetscFunctionBegin;
418   for (i=0; i<m; i++) {
419     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
420     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
421     if (idxm[i] >= rstart && idxm[i] < rend) {
422       row = idxm[i] - rstart;
423       for (j=0; j<n; j++) {
424         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
425         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
426         if (idxn[j] >= cstart && idxn[j] < cend){
427           col = idxn[j] - cstart;
428           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
429         } else {
430           if (!aij->colmap) {
431             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
432           }
433 #if defined (PETSC_USE_CTABLE)
434           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
435           col --;
436 #else
437           col = aij->colmap[idxn[j]] - 1;
438 #endif
439           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
440           else {
441             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
442           }
443         }
444       }
445     } else {
446       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
447     }
448   }
449   PetscFunctionReturn(0);
450 }
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscErrorCode ierr;
458   PetscInt       nstash,reallocs;
459   InsertMode     addv;
460 
461   PetscFunctionBegin;
462   if (aij->donotstash) {
463     PetscFunctionReturn(0);
464   }
465 
466   /* make sure all processors are either in INSERTMODE or ADDMODE */
467   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
468   if (addv == (ADD_VALUES|INSERT_VALUES)) {
469     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
470   }
471   mat->insertmode = addv; /* in case this processor had no cache */
472 
473   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
474   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
475   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
482 {
483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
484   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
485   PetscErrorCode ierr;
486   PetscMPIInt    n;
487   PetscInt       i,j,rstart,ncols,flg;
488   PetscInt       *row,*col;
489   PetscTruth     other_disassembled;
490   PetscScalar    *val;
491   InsertMode     addv = mat->insertmode;
492 
493   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
494   PetscFunctionBegin;
495   if (!aij->donotstash) {
496     while (1) {
497       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
498       if (!flg) break;
499 
500       for (i=0; i<n;) {
501         /* Now identify the consecutive vals belonging to the same row */
502         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
503         if (j < n) ncols = j-i;
504         else       ncols = n-i;
505         /* Now assemble all these values with a single function call */
506         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
507         i = j;
508       }
509     }
510     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
511   }
512   a->compressedrow.use     = PETSC_FALSE;
513   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
514   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
515 
516   /* determine if any processor has disassembled, if so we must
517      also disassemble ourselfs, in order that we may reassemble. */
518   /*
519      if nonzero structure of submatrix B cannot change then we know that
520      no processor disassembled thus we can skip this stuff
521   */
522   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
523     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
524     if (mat->was_assembled && !other_disassembled) {
525       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
526     }
527   }
528   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
529     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
530   }
531   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
532   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
533   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
534   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
535 
536   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
537   aij->rowvalues = 0;
538 
539   /* used by MatAXPY() */
540   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
541   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
542 
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
549 {
550   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
551   PetscErrorCode ierr;
552 
553   PetscFunctionBegin;
554   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
555   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNCT__
560 #define __FUNCT__ "MatZeroRows_MPIAIJ"
561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
562 {
563   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
564   PetscErrorCode ierr;
565   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
566   PetscInt       i,*owners = A->rmap->range;
567   PetscInt       *nprocs,j,idx,nsends,row;
568   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
569   PetscInt       *rvalues,count,base,slen,*source;
570   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
571   MPI_Comm       comm = ((PetscObject)A)->comm;
572   MPI_Request    *send_waits,*recv_waits;
573   MPI_Status     recv_status,*send_status;
574 #if defined(PETSC_DEBUG)
575   PetscTruth     found = PETSC_FALSE;
576 #endif
577 
578   PetscFunctionBegin;
579   /*  first count number of contributors to each processor */
580   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
581   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
582   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
583   j = 0;
584   for (i=0; i<N; i++) {
585     if (lastidx > (idx = rows[i])) j = 0;
586     lastidx = idx;
587     for (; j<size; j++) {
588       if (idx >= owners[j] && idx < owners[j+1]) {
589         nprocs[2*j]++;
590         nprocs[2*j+1] = 1;
591         owner[i] = j;
592 #if defined(PETSC_DEBUG)
593         found = PETSC_TRUE;
594 #endif
595         break;
596       }
597     }
598 #if defined(PETSC_DEBUG)
599     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
600     found = PETSC_FALSE;
601 #endif
602   }
603   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
604 
605   /* inform other processors of number of messages and max length*/
606   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
607 
608   /* post receives:   */
609   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
610   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
611   for (i=0; i<nrecvs; i++) {
612     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
613   }
614 
615   /* do sends:
616       1) starts[i] gives the starting index in svalues for stuff going to
617          the ith processor
618   */
619   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
620   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
621   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
622   starts[0] = 0;
623   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
624   for (i=0; i<N; i++) {
625     svalues[starts[owner[i]]++] = rows[i];
626   }
627 
628   starts[0] = 0;
629   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
630   count = 0;
631   for (i=0; i<size; i++) {
632     if (nprocs[2*i+1]) {
633       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
634     }
635   }
636   ierr = PetscFree(starts);CHKERRQ(ierr);
637 
638   base = owners[rank];
639 
640   /*  wait on receives */
641   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
642   source = lens + nrecvs;
643   count  = nrecvs; slen = 0;
644   while (count) {
645     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
646     /* unpack receives into our local space */
647     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
648     source[imdex]  = recv_status.MPI_SOURCE;
649     lens[imdex]    = n;
650     slen          += n;
651     count--;
652   }
653   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
654 
655   /* move the data into the send scatter */
656   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
657   count = 0;
658   for (i=0; i<nrecvs; i++) {
659     values = rvalues + i*nmax;
660     for (j=0; j<lens[i]; j++) {
661       lrows[count++] = values[j] - base;
662     }
663   }
664   ierr = PetscFree(rvalues);CHKERRQ(ierr);
665   ierr = PetscFree(lens);CHKERRQ(ierr);
666   ierr = PetscFree(owner);CHKERRQ(ierr);
667   ierr = PetscFree(nprocs);CHKERRQ(ierr);
668 
669   /* actually zap the local rows */
670   /*
671         Zero the required rows. If the "diagonal block" of the matrix
672      is square and the user wishes to set the diagonal we use separate
673      code so that MatSetValues() is not called for each diagonal allocating
674      new memory, thus calling lots of mallocs and slowing things down.
675 
676        Contributed by: Matthew Knepley
677   */
678   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
679   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
680   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
681     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
682   } else if (diag != 0.0) {
683     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
684     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
685       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
687     }
688     for (i = 0; i < slen; i++) {
689       row  = lrows[i] + rstart;
690       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
691     }
692     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
693     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
694   } else {
695     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
696   }
697   ierr = PetscFree(lrows);CHKERRQ(ierr);
698 
699   /* wait on sends */
700   if (nsends) {
701     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
702     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
703     ierr = PetscFree(send_status);CHKERRQ(ierr);
704   }
705   ierr = PetscFree(send_waits);CHKERRQ(ierr);
706   ierr = PetscFree(svalues);CHKERRQ(ierr);
707 
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNCT__
712 #define __FUNCT__ "MatMult_MPIAIJ"
713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
714 {
715   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
716   PetscErrorCode ierr;
717   PetscInt       nt;
718 
719   PetscFunctionBegin;
720   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
721   if (nt != A->cmap->n) {
722     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
723   }
724   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
725   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
726   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
727   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
728   PetscFunctionReturn(0);
729 }
730 
731 #undef __FUNCT__
732 #define __FUNCT__ "MatMultAdd_MPIAIJ"
733 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
734 {
735   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
736   PetscErrorCode ierr;
737 
738   PetscFunctionBegin;
739   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
740   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
741   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
742   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
743   PetscFunctionReturn(0);
744 }
745 
746 #undef __FUNCT__
747 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
748 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
749 {
750   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
751   PetscErrorCode ierr;
752   PetscTruth     merged;
753 
754   PetscFunctionBegin;
755   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
756   /* do nondiagonal part */
757   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
758   if (!merged) {
759     /* send it on its way */
760     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
761     /* do local part */
762     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
763     /* receive remote parts: note this assumes the values are not actually */
764     /* added in yy until the next line, */
765     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
766   } else {
767     /* do local part */
768     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
769     /* send it on its way */
770     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
771     /* values actually were received in the Begin() but we need to call this nop */
772     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
773   }
774   PetscFunctionReturn(0);
775 }
776 
777 EXTERN_C_BEGIN
778 #undef __FUNCT__
779 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
780 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
781 {
782   MPI_Comm       comm;
783   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
784   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
785   IS             Me,Notme;
786   PetscErrorCode ierr;
787   PetscInt       M,N,first,last,*notme,i;
788   PetscMPIInt    size;
789 
790   PetscFunctionBegin;
791 
792   /* Easy test: symmetric diagonal block */
793   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
794   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
795   if (!*f) PetscFunctionReturn(0);
796   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
797   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
798   if (size == 1) PetscFunctionReturn(0);
799 
800   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
801   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
802   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
803   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
804   for (i=0; i<first; i++) notme[i] = i;
805   for (i=last; i<M; i++) notme[i-last+first] = i;
806   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
807   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
808   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
809   Aoff = Aoffs[0];
810   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
811   Boff = Boffs[0];
812   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
813   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
814   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
815   ierr = ISDestroy(Me);CHKERRQ(ierr);
816   ierr = ISDestroy(Notme);CHKERRQ(ierr);
817 
818   PetscFunctionReturn(0);
819 }
820 EXTERN_C_END
821 
822 #undef __FUNCT__
823 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
824 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
825 {
826   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
827   PetscErrorCode ierr;
828 
829   PetscFunctionBegin;
830   /* do nondiagonal part */
831   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
832   /* send it on its way */
833   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
834   /* do local part */
835   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
836   /* receive remote parts */
837   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
838   PetscFunctionReturn(0);
839 }
840 
841 /*
842   This only works correctly for square matrices where the subblock A->A is the
843    diagonal block
844 */
845 #undef __FUNCT__
846 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
847 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
848 {
849   PetscErrorCode ierr;
850   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
851 
852   PetscFunctionBegin;
853   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
854   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) {
855     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
856   }
857   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
858   PetscFunctionReturn(0);
859 }
860 
861 #undef __FUNCT__
862 #define __FUNCT__ "MatScale_MPIAIJ"
863 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
864 {
865   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
866   PetscErrorCode ierr;
867 
868   PetscFunctionBegin;
869   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
870   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
871   PetscFunctionReturn(0);
872 }
873 
874 #undef __FUNCT__
875 #define __FUNCT__ "MatDestroy_MPIAIJ"
876 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
877 {
878   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
879   PetscErrorCode ierr;
880 
881   PetscFunctionBegin;
882 #if defined(PETSC_USE_LOG)
883   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
884 #endif
885   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
886   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
887   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
888 #if defined (PETSC_USE_CTABLE)
889   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
890 #else
891   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
892 #endif
893   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
894   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
895   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
896   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
897   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
898   ierr = PetscFree(aij);CHKERRQ(ierr);
899 
900   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
901   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
902   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
903   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
904   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
905   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
906   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
907   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
908   PetscFunctionReturn(0);
909 }
910 
911 #undef __FUNCT__
912 #define __FUNCT__ "MatView_MPIAIJ_Binary"
913 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
914 {
915   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
916   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
917   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
918   PetscErrorCode    ierr;
919   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
920   int               fd;
921   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
922   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
923   PetscScalar       *column_values;
924 
925   PetscFunctionBegin;
926   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
927   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
928   nz   = A->nz + B->nz;
929   if (!rank) {
930     header[0] = MAT_FILE_COOKIE;
931     header[1] = mat->rmap->N;
932     header[2] = mat->cmap->N;
933     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
934     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
935     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
936     /* get largest number of rows any processor has */
937     rlen = mat->rmap->n;
938     range = mat->rmap->range;
939     for (i=1; i<size; i++) {
940       rlen = PetscMax(rlen,range[i+1] - range[i]);
941     }
942   } else {
943     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
944     rlen = mat->rmap->n;
945   }
946 
947   /* load up the local row counts */
948   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
949   for (i=0; i<mat->rmap->n; i++) {
950     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
951   }
952 
953   /* store the row lengths to the file */
954   if (!rank) {
955     MPI_Status status;
956     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
957     for (i=1; i<size; i++) {
958       rlen = range[i+1] - range[i];
959       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
960       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
961     }
962   } else {
963     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
964   }
965   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
966 
967   /* load up the local column indices */
968   nzmax = nz; /* )th processor needs space a largest processor needs */
969   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
970   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
971   cnt  = 0;
972   for (i=0; i<mat->rmap->n; i++) {
973     for (j=B->i[i]; j<B->i[i+1]; j++) {
974       if ( (col = garray[B->j[j]]) > cstart) break;
975       column_indices[cnt++] = col;
976     }
977     for (k=A->i[i]; k<A->i[i+1]; k++) {
978       column_indices[cnt++] = A->j[k] + cstart;
979     }
980     for (; j<B->i[i+1]; j++) {
981       column_indices[cnt++] = garray[B->j[j]];
982     }
983   }
984   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
985 
986   /* store the column indices to the file */
987   if (!rank) {
988     MPI_Status status;
989     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
990     for (i=1; i<size; i++) {
991       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
992       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
993       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
994       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
995     }
996   } else {
997     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
998     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
999   }
1000   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1001 
1002   /* load up the local column values */
1003   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1004   cnt  = 0;
1005   for (i=0; i<mat->rmap->n; i++) {
1006     for (j=B->i[i]; j<B->i[i+1]; j++) {
1007       if ( garray[B->j[j]] > cstart) break;
1008       column_values[cnt++] = B->a[j];
1009     }
1010     for (k=A->i[i]; k<A->i[i+1]; k++) {
1011       column_values[cnt++] = A->a[k];
1012     }
1013     for (; j<B->i[i+1]; j++) {
1014       column_values[cnt++] = B->a[j];
1015     }
1016   }
1017   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1018 
1019   /* store the column values to the file */
1020   if (!rank) {
1021     MPI_Status status;
1022     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1023     for (i=1; i<size; i++) {
1024       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1025       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1026       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1027       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1028     }
1029   } else {
1030     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1031     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1032   }
1033   ierr = PetscFree(column_values);CHKERRQ(ierr);
1034   PetscFunctionReturn(0);
1035 }
1036 
1037 #undef __FUNCT__
1038 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1039 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1040 {
1041   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1042   PetscErrorCode    ierr;
1043   PetscMPIInt       rank = aij->rank,size = aij->size;
1044   PetscTruth        isdraw,iascii,isbinary;
1045   PetscViewer       sviewer;
1046   PetscViewerFormat format;
1047 
1048   PetscFunctionBegin;
1049   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1050   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1051   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1052   if (iascii) {
1053     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1054     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1055       MatInfo    info;
1056       PetscTruth inodes;
1057 
1058       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1059       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1060       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1061       if (!inodes) {
1062         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1063 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1064       } else {
1065         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1066 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1067       }
1068       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1069       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1070       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1071       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1072       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1073       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1074       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1075       PetscFunctionReturn(0);
1076     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1077       PetscInt   inodecount,inodelimit,*inodes;
1078       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1079       if (inodes) {
1080         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1081       } else {
1082         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1083       }
1084       PetscFunctionReturn(0);
1085     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1086       PetscFunctionReturn(0);
1087     }
1088   } else if (isbinary) {
1089     if (size == 1) {
1090       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1091       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1092     } else {
1093       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1094     }
1095     PetscFunctionReturn(0);
1096   } else if (isdraw) {
1097     PetscDraw  draw;
1098     PetscTruth isnull;
1099     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1100     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1101   }
1102 
1103   if (size == 1) {
1104     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1105     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1106   } else {
1107     /* assemble the entire matrix onto first processor. */
1108     Mat         A;
1109     Mat_SeqAIJ  *Aloc;
1110     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1111     MatScalar   *a;
1112 
1113     if (mat->rmap->N > 1024) {
1114       PetscTruth flg;
1115 
1116       ierr = PetscOptionsHasName(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg);CHKERRQ(ierr);
1117       if (!flg) {
1118         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 512 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1119       }
1120     }
1121 
1122     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1123     if (!rank) {
1124       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1125     } else {
1126       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1127     }
1128     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1129     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1130     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1131     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1132 
1133     /* copy over the A part */
1134     Aloc = (Mat_SeqAIJ*)aij->A->data;
1135     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1136     row = mat->rmap->rstart;
1137     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1138     for (i=0; i<m; i++) {
1139       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1140       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1141     }
1142     aj = Aloc->j;
1143     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1144 
1145     /* copy over the B part */
1146     Aloc = (Mat_SeqAIJ*)aij->B->data;
1147     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1148     row  = mat->rmap->rstart;
1149     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1150     ct   = cols;
1151     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1152     for (i=0; i<m; i++) {
1153       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1154       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1155     }
1156     ierr = PetscFree(ct);CHKERRQ(ierr);
1157     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1158     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1159     /*
1160        Everyone has to call to draw the matrix since the graphics waits are
1161        synchronized across all processors that share the PetscDraw object
1162     */
1163     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1164     if (!rank) {
1165       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1166       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1167     }
1168     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1169     ierr = MatDestroy(A);CHKERRQ(ierr);
1170   }
1171   PetscFunctionReturn(0);
1172 }
1173 
1174 #undef __FUNCT__
1175 #define __FUNCT__ "MatView_MPIAIJ"
1176 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1177 {
1178   PetscErrorCode ierr;
1179   PetscTruth     iascii,isdraw,issocket,isbinary;
1180 
1181   PetscFunctionBegin;
1182   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1183   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1184   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1185   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1186   if (iascii || isdraw || isbinary || issocket) {
1187     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1188   } else {
1189     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1190   }
1191   PetscFunctionReturn(0);
1192 }
1193 
1194 #undef __FUNCT__
1195 #define __FUNCT__ "MatRelax_MPIAIJ"
1196 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1197 {
1198   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1199   PetscErrorCode ierr;
1200   Vec            bb1;
1201 
1202   PetscFunctionBegin;
1203   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1204 
1205   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1206     if (flag & SOR_ZERO_INITIAL_GUESS) {
1207       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1208       its--;
1209     }
1210 
1211     while (its--) {
1212       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1213       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1214 
1215       /* update rhs: bb1 = bb - B*x */
1216       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1217       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1218 
1219       /* local sweep */
1220       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
1221     }
1222   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1223     if (flag & SOR_ZERO_INITIAL_GUESS) {
1224       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1225       its--;
1226     }
1227     while (its--) {
1228       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1229       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1230 
1231       /* update rhs: bb1 = bb - B*x */
1232       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1233       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1234 
1235       /* local sweep */
1236       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1237     }
1238   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1239     if (flag & SOR_ZERO_INITIAL_GUESS) {
1240       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1241       its--;
1242     }
1243     while (its--) {
1244       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1245       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1246 
1247       /* update rhs: bb1 = bb - B*x */
1248       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1249       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1250 
1251       /* local sweep */
1252       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1253     }
1254   } else {
1255     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1256   }
1257 
1258   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #undef __FUNCT__
1263 #define __FUNCT__ "MatPermute_MPIAIJ"
1264 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1265 {
1266   MPI_Comm       comm,pcomm;
1267   PetscInt       first,local_size,nrows;
1268   const PetscInt *rows;
1269   int            ntids;
1270   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1271   PetscErrorCode ierr;
1272 
1273   PetscFunctionBegin;
1274   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1275   /* make a collective version of 'rowp' */
1276   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1277   if (pcomm==comm) {
1278     crowp = rowp;
1279   } else {
1280     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1281     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1282     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1283     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1284   }
1285   /* collect the global row permutation and invert it */
1286   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1287   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1288   if (pcomm!=comm) {
1289     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1290   }
1291   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1292   /* get the local target indices */
1293   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1294   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1295   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1296   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1297   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1298   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1299   /* the column permutation is so much easier;
1300      make a local version of 'colp' and invert it */
1301   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1302   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1303   if (ntids==1) {
1304     lcolp = colp;
1305   } else {
1306     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1307     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1308     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1309   }
1310   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1311   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1312   if (ntids>1) {
1313     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1314     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1315   }
1316   /* now we just get the submatrix */
1317   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1318   /* clean up */
1319   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1320   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1321   PetscFunctionReturn(0);
1322 }
1323 
1324 #undef __FUNCT__
1325 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1326 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1327 {
1328   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1329   Mat            A = mat->A,B = mat->B;
1330   PetscErrorCode ierr;
1331   PetscReal      isend[5],irecv[5];
1332 
1333   PetscFunctionBegin;
1334   info->block_size     = 1.0;
1335   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1336   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1337   isend[3] = info->memory;  isend[4] = info->mallocs;
1338   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1339   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1340   isend[3] += info->memory;  isend[4] += info->mallocs;
1341   if (flag == MAT_LOCAL) {
1342     info->nz_used      = isend[0];
1343     info->nz_allocated = isend[1];
1344     info->nz_unneeded  = isend[2];
1345     info->memory       = isend[3];
1346     info->mallocs      = isend[4];
1347   } else if (flag == MAT_GLOBAL_MAX) {
1348     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1349     info->nz_used      = irecv[0];
1350     info->nz_allocated = irecv[1];
1351     info->nz_unneeded  = irecv[2];
1352     info->memory       = irecv[3];
1353     info->mallocs      = irecv[4];
1354   } else if (flag == MAT_GLOBAL_SUM) {
1355     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1356     info->nz_used      = irecv[0];
1357     info->nz_allocated = irecv[1];
1358     info->nz_unneeded  = irecv[2];
1359     info->memory       = irecv[3];
1360     info->mallocs      = irecv[4];
1361   }
1362   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1363   info->fill_ratio_needed = 0;
1364   info->factor_mallocs    = 0;
1365 
1366   PetscFunctionReturn(0);
1367 }
1368 
1369 #undef __FUNCT__
1370 #define __FUNCT__ "MatSetOption_MPIAIJ"
1371 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1372 {
1373   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1374   PetscErrorCode ierr;
1375 
1376   PetscFunctionBegin;
1377   switch (op) {
1378   case MAT_NEW_NONZERO_LOCATIONS:
1379   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1380   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1381   case MAT_KEEP_ZEROED_ROWS:
1382   case MAT_NEW_NONZERO_LOCATION_ERR:
1383   case MAT_USE_INODES:
1384   case MAT_IGNORE_ZERO_ENTRIES:
1385     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1386     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1387     break;
1388   case MAT_ROW_ORIENTED:
1389     a->roworiented = flg;
1390     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1391     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1392     break;
1393   case MAT_NEW_DIAGONALS:
1394     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1395     break;
1396   case MAT_IGNORE_OFF_PROC_ENTRIES:
1397     a->donotstash = PETSC_TRUE;
1398     break;
1399   case MAT_SYMMETRIC:
1400     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1401     break;
1402   case MAT_STRUCTURALLY_SYMMETRIC:
1403   case MAT_HERMITIAN:
1404   case MAT_SYMMETRY_ETERNAL:
1405     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1406     break;
1407   default:
1408     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1409   }
1410   PetscFunctionReturn(0);
1411 }
1412 
1413 #undef __FUNCT__
1414 #define __FUNCT__ "MatGetRow_MPIAIJ"
1415 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1416 {
1417   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1418   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1419   PetscErrorCode ierr;
1420   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1421   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1422   PetscInt       *cmap,*idx_p;
1423 
1424   PetscFunctionBegin;
1425   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1426   mat->getrowactive = PETSC_TRUE;
1427 
1428   if (!mat->rowvalues && (idx || v)) {
1429     /*
1430         allocate enough space to hold information from the longest row.
1431     */
1432     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1433     PetscInt     max = 1,tmp;
1434     for (i=0; i<matin->rmap->n; i++) {
1435       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1436       if (max < tmp) { max = tmp; }
1437     }
1438     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1439     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1440   }
1441 
1442   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1443   lrow = row - rstart;
1444 
1445   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1446   if (!v)   {pvA = 0; pvB = 0;}
1447   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1448   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1449   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1450   nztot = nzA + nzB;
1451 
1452   cmap  = mat->garray;
1453   if (v  || idx) {
1454     if (nztot) {
1455       /* Sort by increasing column numbers, assuming A and B already sorted */
1456       PetscInt imark = -1;
1457       if (v) {
1458         *v = v_p = mat->rowvalues;
1459         for (i=0; i<nzB; i++) {
1460           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1461           else break;
1462         }
1463         imark = i;
1464         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1465         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1466       }
1467       if (idx) {
1468         *idx = idx_p = mat->rowindices;
1469         if (imark > -1) {
1470           for (i=0; i<imark; i++) {
1471             idx_p[i] = cmap[cworkB[i]];
1472           }
1473         } else {
1474           for (i=0; i<nzB; i++) {
1475             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1476             else break;
1477           }
1478           imark = i;
1479         }
1480         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1481         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1482       }
1483     } else {
1484       if (idx) *idx = 0;
1485       if (v)   *v   = 0;
1486     }
1487   }
1488   *nz = nztot;
1489   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1490   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1491   PetscFunctionReturn(0);
1492 }
1493 
1494 #undef __FUNCT__
1495 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1496 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1497 {
1498   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1499 
1500   PetscFunctionBegin;
1501   if (!aij->getrowactive) {
1502     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1503   }
1504   aij->getrowactive = PETSC_FALSE;
1505   PetscFunctionReturn(0);
1506 }
1507 
1508 #undef __FUNCT__
1509 #define __FUNCT__ "MatNorm_MPIAIJ"
1510 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1511 {
1512   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1513   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1514   PetscErrorCode ierr;
1515   PetscInt       i,j,cstart = mat->cmap->rstart;
1516   PetscReal      sum = 0.0;
1517   MatScalar      *v;
1518 
1519   PetscFunctionBegin;
1520   if (aij->size == 1) {
1521     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1522   } else {
1523     if (type == NORM_FROBENIUS) {
1524       v = amat->a;
1525       for (i=0; i<amat->nz; i++) {
1526 #if defined(PETSC_USE_COMPLEX)
1527         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1528 #else
1529         sum += (*v)*(*v); v++;
1530 #endif
1531       }
1532       v = bmat->a;
1533       for (i=0; i<bmat->nz; i++) {
1534 #if defined(PETSC_USE_COMPLEX)
1535         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1536 #else
1537         sum += (*v)*(*v); v++;
1538 #endif
1539       }
1540       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1541       *norm = sqrt(*norm);
1542     } else if (type == NORM_1) { /* max column norm */
1543       PetscReal *tmp,*tmp2;
1544       PetscInt  *jj,*garray = aij->garray;
1545       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1546       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1547       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1548       *norm = 0.0;
1549       v = amat->a; jj = amat->j;
1550       for (j=0; j<amat->nz; j++) {
1551         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1552       }
1553       v = bmat->a; jj = bmat->j;
1554       for (j=0; j<bmat->nz; j++) {
1555         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1556       }
1557       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1558       for (j=0; j<mat->cmap->N; j++) {
1559         if (tmp2[j] > *norm) *norm = tmp2[j];
1560       }
1561       ierr = PetscFree(tmp);CHKERRQ(ierr);
1562       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1563     } else if (type == NORM_INFINITY) { /* max row norm */
1564       PetscReal ntemp = 0.0;
1565       for (j=0; j<aij->A->rmap->n; j++) {
1566         v = amat->a + amat->i[j];
1567         sum = 0.0;
1568         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1569           sum += PetscAbsScalar(*v); v++;
1570         }
1571         v = bmat->a + bmat->i[j];
1572         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1573           sum += PetscAbsScalar(*v); v++;
1574         }
1575         if (sum > ntemp) ntemp = sum;
1576       }
1577       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1578     } else {
1579       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1580     }
1581   }
1582   PetscFunctionReturn(0);
1583 }
1584 
1585 #undef __FUNCT__
1586 #define __FUNCT__ "MatTranspose_MPIAIJ"
1587 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1588 {
1589   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1590   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1591   PetscErrorCode ierr;
1592   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1593   PetscInt       cstart=A->cmap->rstart,ncol;
1594   Mat            B;
1595   MatScalar      *array;
1596 
1597   PetscFunctionBegin;
1598   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1599 
1600   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1601   ai = Aloc->i; aj = Aloc->j;
1602   bi = Bloc->i; bj = Bloc->j;
1603   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1604     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1605     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1606     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1607     for (i=0; i<ai[ma]; i++){
1608       d_nnz[aj[i]] ++;
1609       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1610     }
1611 
1612     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1613     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1614     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1615     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1616     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1617   } else {
1618     B = *matout;
1619   }
1620 
1621   /* copy over the A part */
1622   array = Aloc->a;
1623   row = A->rmap->rstart;
1624   for (i=0; i<ma; i++) {
1625     ncol = ai[i+1]-ai[i];
1626     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1627     row++; array += ncol; aj += ncol;
1628   }
1629   aj = Aloc->j;
1630   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1631 
1632   /* copy over the B part */
1633   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1634   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1635   array = Bloc->a;
1636   row = A->rmap->rstart;
1637   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1638   cols_tmp = cols;
1639   for (i=0; i<mb; i++) {
1640     ncol = bi[i+1]-bi[i];
1641     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1642     row++; array += ncol; cols_tmp += ncol;
1643   }
1644   ierr = PetscFree(cols);CHKERRQ(ierr);
1645 
1646   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1647   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1648   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1649     *matout = B;
1650   } else {
1651     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1652   }
1653   PetscFunctionReturn(0);
1654 }
1655 
1656 #undef __FUNCT__
1657 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1658 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1659 {
1660   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1661   Mat            a = aij->A,b = aij->B;
1662   PetscErrorCode ierr;
1663   PetscInt       s1,s2,s3;
1664 
1665   PetscFunctionBegin;
1666   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1667   if (rr) {
1668     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1669     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1670     /* Overlap communication with computation. */
1671     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1672   }
1673   if (ll) {
1674     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1675     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1676     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1677   }
1678   /* scale  the diagonal block */
1679   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1680 
1681   if (rr) {
1682     /* Do a scatter end and then right scale the off-diagonal block */
1683     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1684     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1685   }
1686 
1687   PetscFunctionReturn(0);
1688 }
1689 
1690 #undef __FUNCT__
1691 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1692 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1693 {
1694   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1695   PetscErrorCode ierr;
1696 
1697   PetscFunctionBegin;
1698   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1699   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1700   PetscFunctionReturn(0);
1701 }
1702 #undef __FUNCT__
1703 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1704 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1705 {
1706   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1707   PetscErrorCode ierr;
1708 
1709   PetscFunctionBegin;
1710   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1711   PetscFunctionReturn(0);
1712 }
1713 
1714 #undef __FUNCT__
1715 #define __FUNCT__ "MatEqual_MPIAIJ"
1716 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1717 {
1718   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1719   Mat            a,b,c,d;
1720   PetscTruth     flg;
1721   PetscErrorCode ierr;
1722 
1723   PetscFunctionBegin;
1724   a = matA->A; b = matA->B;
1725   c = matB->A; d = matB->B;
1726 
1727   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1728   if (flg) {
1729     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1730   }
1731   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1732   PetscFunctionReturn(0);
1733 }
1734 
1735 #undef __FUNCT__
1736 #define __FUNCT__ "MatCopy_MPIAIJ"
1737 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1738 {
1739   PetscErrorCode ierr;
1740   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1741   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1742 
1743   PetscFunctionBegin;
1744   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1745   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1746     /* because of the column compression in the off-processor part of the matrix a->B,
1747        the number of columns in a->B and b->B may be different, hence we cannot call
1748        the MatCopy() directly on the two parts. If need be, we can provide a more
1749        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1750        then copying the submatrices */
1751     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1752   } else {
1753     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1754     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1755   }
1756   PetscFunctionReturn(0);
1757 }
1758 
1759 #undef __FUNCT__
1760 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1761 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1762 {
1763   PetscErrorCode ierr;
1764 
1765   PetscFunctionBegin;
1766   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1767   PetscFunctionReturn(0);
1768 }
1769 
1770 #include "petscblaslapack.h"
1771 #undef __FUNCT__
1772 #define __FUNCT__ "MatAXPY_MPIAIJ"
1773 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1774 {
1775   PetscErrorCode ierr;
1776   PetscInt       i;
1777   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1778   PetscBLASInt   bnz,one=1;
1779   Mat_SeqAIJ     *x,*y;
1780 
1781   PetscFunctionBegin;
1782   if (str == SAME_NONZERO_PATTERN) {
1783     PetscScalar alpha = a;
1784     x = (Mat_SeqAIJ *)xx->A->data;
1785     y = (Mat_SeqAIJ *)yy->A->data;
1786     bnz = PetscBLASIntCast(x->nz);
1787     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1788     x = (Mat_SeqAIJ *)xx->B->data;
1789     y = (Mat_SeqAIJ *)yy->B->data;
1790     bnz = PetscBLASIntCast(x->nz);
1791     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1792   } else if (str == SUBSET_NONZERO_PATTERN) {
1793     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1794 
1795     x = (Mat_SeqAIJ *)xx->B->data;
1796     y = (Mat_SeqAIJ *)yy->B->data;
1797     if (y->xtoy && y->XtoY != xx->B) {
1798       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1799       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1800     }
1801     if (!y->xtoy) { /* get xtoy */
1802       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1803       y->XtoY = xx->B;
1804       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1805     }
1806     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1807   } else {
1808     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1809   }
1810   PetscFunctionReturn(0);
1811 }
1812 
1813 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1814 
1815 #undef __FUNCT__
1816 #define __FUNCT__ "MatConjugate_MPIAIJ"
1817 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1818 {
1819 #if defined(PETSC_USE_COMPLEX)
1820   PetscErrorCode ierr;
1821   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1822 
1823   PetscFunctionBegin;
1824   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1825   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1826 #else
1827   PetscFunctionBegin;
1828 #endif
1829   PetscFunctionReturn(0);
1830 }
1831 
1832 #undef __FUNCT__
1833 #define __FUNCT__ "MatRealPart_MPIAIJ"
1834 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1835 {
1836   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1837   PetscErrorCode ierr;
1838 
1839   PetscFunctionBegin;
1840   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1841   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1842   PetscFunctionReturn(0);
1843 }
1844 
1845 #undef __FUNCT__
1846 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1847 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1848 {
1849   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1850   PetscErrorCode ierr;
1851 
1852   PetscFunctionBegin;
1853   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1854   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1855   PetscFunctionReturn(0);
1856 }
1857 
1858 #ifdef PETSC_HAVE_PBGL
1859 
1860 #include <boost/parallel/mpi/bsp_process_group.hpp>
1861 #include <boost/graph/distributed/ilu_default_graph.hpp>
1862 #include <boost/graph/distributed/ilu_0_block.hpp>
1863 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1864 #include <boost/graph/distributed/petsc/interface.hpp>
1865 #include <boost/multi_array.hpp>
1866 #include <boost/parallel/distributed_property_map->hpp>
1867 
1868 #undef __FUNCT__
1869 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1870 /*
1871   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1872 */
1873 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1874 {
1875   namespace petsc = boost::distributed::petsc;
1876 
1877   namespace graph_dist = boost::graph::distributed;
1878   using boost::graph::distributed::ilu_default::process_group_type;
1879   using boost::graph::ilu_permuted;
1880 
1881   PetscTruth      row_identity, col_identity;
1882   PetscContainer  c;
1883   PetscInt        m, n, M, N;
1884   PetscErrorCode  ierr;
1885 
1886   PetscFunctionBegin;
1887   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1888   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1889   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1890   if (!row_identity || !col_identity) {
1891     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1892   }
1893 
1894   process_group_type pg;
1895   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1896   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1897   lgraph_type&   level_graph = *lgraph_p;
1898   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1899 
1900   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1901   ilu_permuted(level_graph);
1902 
1903   /* put together the new matrix */
1904   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1905   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1906   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1907   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1908   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1909   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1910   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1911 
1912   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1913   ierr = PetscContainerSetPointer(c, lgraph_p);
1914   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1915   PetscFunctionReturn(0);
1916 }
1917 
1918 #undef __FUNCT__
1919 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1920 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1921 {
1922   PetscFunctionBegin;
1923   PetscFunctionReturn(0);
1924 }
1925 
1926 #undef __FUNCT__
1927 #define __FUNCT__ "MatSolve_MPIAIJ"
1928 /*
1929   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1930 */
1931 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1932 {
1933   namespace graph_dist = boost::graph::distributed;
1934 
1935   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1936   lgraph_type*   lgraph_p;
1937   PetscContainer c;
1938   PetscErrorCode ierr;
1939 
1940   PetscFunctionBegin;
1941   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1942   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1943   ierr = VecCopy(b, x); CHKERRQ(ierr);
1944 
1945   PetscScalar* array_x;
1946   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1947   PetscInt sx;
1948   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1949 
1950   PetscScalar* array_b;
1951   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1952   PetscInt sb;
1953   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1954 
1955   lgraph_type&   level_graph = *lgraph_p;
1956   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1957 
1958   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
1959   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
1960                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
1961 
1962   typedef boost::iterator_property_map<array_ref_type::iterator,
1963                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
1964   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
1965                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
1966 
1967   ilu_set_solve(*lgraph_p, vector_b, vector_x);
1968 
1969   PetscFunctionReturn(0);
1970 }
1971 #endif
1972 
1973 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
1974   PetscInt       nzlocal,nsends,nrecvs;
1975   PetscMPIInt    *send_rank;
1976   PetscInt       *sbuf_nz,*sbuf_j,**rbuf_j;
1977   PetscScalar    *sbuf_a,**rbuf_a;
1978   PetscErrorCode (*MatDestroy)(Mat);
1979 } Mat_Redundant;
1980 
1981 #undef __FUNCT__
1982 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
1983 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
1984 {
1985   PetscErrorCode       ierr;
1986   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
1987   PetscInt             i;
1988 
1989   PetscFunctionBegin;
1990   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
1991   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1992   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1993   for (i=0; i<redund->nrecvs; i++){
1994     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1995     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1996   }
1997   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1998   ierr = PetscFree(redund);CHKERRQ(ierr);
1999   PetscFunctionReturn(0);
2000 }
2001 
2002 #undef __FUNCT__
2003 #define __FUNCT__ "MatDestroy_MatRedundant"
2004 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2005 {
2006   PetscErrorCode  ierr;
2007   PetscContainer  container;
2008   Mat_Redundant   *redund=PETSC_NULL;
2009 
2010   PetscFunctionBegin;
2011   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2012   if (container) {
2013     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2014   } else {
2015     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2016   }
2017   A->ops->destroy = redund->MatDestroy;
2018   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2019   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2020   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2021   PetscFunctionReturn(0);
2022 }
2023 
2024 #undef __FUNCT__
2025 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2026 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2027 {
2028   PetscMPIInt    rank,size;
2029   MPI_Comm       comm=((PetscObject)mat)->comm;
2030   PetscErrorCode ierr;
2031   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2032   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2033   PetscInt       *rowrange=mat->rmap->range;
2034   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2035   Mat            A=aij->A,B=aij->B,C=*matredundant;
2036   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2037   PetscScalar    *sbuf_a;
2038   PetscInt       nzlocal=a->nz+b->nz;
2039   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2040   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2041   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2042   MatScalar      *aworkA,*aworkB;
2043   PetscScalar    *vals;
2044   PetscMPIInt    tag1,tag2,tag3,imdex;
2045   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2046                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2047   MPI_Status     recv_status,*send_status;
2048   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2049   PetscInt       **rbuf_j=PETSC_NULL;
2050   PetscScalar    **rbuf_a=PETSC_NULL;
2051   Mat_Redundant  *redund=PETSC_NULL;
2052   PetscContainer container;
2053 
2054   PetscFunctionBegin;
2055   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2056   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2057 
2058   if (reuse == MAT_REUSE_MATRIX) {
2059     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2060     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2061     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2062     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2063     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2064     if (container) {
2065       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2066     } else {
2067       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2068     }
2069     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2070 
2071     nsends    = redund->nsends;
2072     nrecvs    = redund->nrecvs;
2073     send_rank = redund->send_rank; recv_rank = send_rank + size;
2074     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
2075     sbuf_j    = redund->sbuf_j;
2076     sbuf_a    = redund->sbuf_a;
2077     rbuf_j    = redund->rbuf_j;
2078     rbuf_a    = redund->rbuf_a;
2079   }
2080 
2081   if (reuse == MAT_INITIAL_MATRIX){
2082     PetscMPIInt  subrank,subsize;
2083     PetscInt     nleftover,np_subcomm;
2084     /* get the destination processors' id send_rank, nsends and nrecvs */
2085     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2086     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2087     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
2088     recv_rank = send_rank + size;
2089     np_subcomm = size/nsubcomm;
2090     nleftover  = size - nsubcomm*np_subcomm;
2091     nsends = 0; nrecvs = 0;
2092     for (i=0; i<size; i++){ /* i=rank*/
2093       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2094         send_rank[nsends] = i; nsends++;
2095         recv_rank[nrecvs++] = i;
2096       }
2097     }
2098     if (rank >= size - nleftover){/* this proc is a leftover processor */
2099       i = size-nleftover-1;
2100       j = 0;
2101       while (j < nsubcomm - nleftover){
2102         send_rank[nsends++] = i;
2103         i--; j++;
2104       }
2105     }
2106 
2107     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2108       for (i=0; i<nleftover; i++){
2109         recv_rank[nrecvs++] = size-nleftover+i;
2110       }
2111     }
2112 
2113     /* allocate sbuf_j, sbuf_a */
2114     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2115     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2116     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2117   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2118 
2119   /* copy mat's local entries into the buffers */
2120   if (reuse == MAT_INITIAL_MATRIX){
2121     rownz_max = 0;
2122     rptr = sbuf_j;
2123     cols = sbuf_j + rend-rstart + 1;
2124     vals = sbuf_a;
2125     rptr[0] = 0;
2126     for (i=0; i<rend-rstart; i++){
2127       row = i + rstart;
2128       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2129       ncols  = nzA + nzB;
2130       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2131       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2132       /* load the column indices for this row into cols */
2133       lwrite = 0;
2134       for (l=0; l<nzB; l++) {
2135         if ((ctmp = bmap[cworkB[l]]) < cstart){
2136           vals[lwrite]   = aworkB[l];
2137           cols[lwrite++] = ctmp;
2138         }
2139       }
2140       for (l=0; l<nzA; l++){
2141         vals[lwrite]   = aworkA[l];
2142         cols[lwrite++] = cstart + cworkA[l];
2143       }
2144       for (l=0; l<nzB; l++) {
2145         if ((ctmp = bmap[cworkB[l]]) >= cend){
2146           vals[lwrite]   = aworkB[l];
2147           cols[lwrite++] = ctmp;
2148         }
2149       }
2150       vals += ncols;
2151       cols += ncols;
2152       rptr[i+1] = rptr[i] + ncols;
2153       if (rownz_max < ncols) rownz_max = ncols;
2154     }
2155     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2156   } else { /* only copy matrix values into sbuf_a */
2157     rptr = sbuf_j;
2158     vals = sbuf_a;
2159     rptr[0] = 0;
2160     for (i=0; i<rend-rstart; i++){
2161       row = i + rstart;
2162       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2163       ncols  = nzA + nzB;
2164       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2165       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2166       lwrite = 0;
2167       for (l=0; l<nzB; l++) {
2168         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2169       }
2170       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2171       for (l=0; l<nzB; l++) {
2172         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2173       }
2174       vals += ncols;
2175       rptr[i+1] = rptr[i] + ncols;
2176     }
2177   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2178 
2179   /* send nzlocal to others, and recv other's nzlocal */
2180   /*--------------------------------------------------*/
2181   if (reuse == MAT_INITIAL_MATRIX){
2182     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2183     s_waits2 = s_waits3 + nsends;
2184     s_waits1 = s_waits2 + nsends;
2185     r_waits1 = s_waits1 + nsends;
2186     r_waits2 = r_waits1 + nrecvs;
2187     r_waits3 = r_waits2 + nrecvs;
2188   } else {
2189     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2190     r_waits3 = s_waits3 + nsends;
2191   }
2192 
2193   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2194   if (reuse == MAT_INITIAL_MATRIX){
2195     /* get new tags to keep the communication clean */
2196     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2197     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2198     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2199     rbuf_nz = sbuf_nz + nsends;
2200 
2201     /* post receives of other's nzlocal */
2202     for (i=0; i<nrecvs; i++){
2203       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2204     }
2205     /* send nzlocal to others */
2206     for (i=0; i<nsends; i++){
2207       sbuf_nz[i] = nzlocal;
2208       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2209     }
2210     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2211     count = nrecvs;
2212     while (count) {
2213       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2214       recv_rank[imdex] = recv_status.MPI_SOURCE;
2215       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2216       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2217 
2218       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2219       rbuf_nz[imdex] += i + 2;
2220       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2221       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2222       count--;
2223     }
2224     /* wait on sends of nzlocal */
2225     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2226     /* send mat->i,j to others, and recv from other's */
2227     /*------------------------------------------------*/
2228     for (i=0; i<nsends; i++){
2229       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2230       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2231     }
2232     /* wait on receives of mat->i,j */
2233     /*------------------------------*/
2234     count = nrecvs;
2235     while (count) {
2236       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2237       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2238       count--;
2239     }
2240     /* wait on sends of mat->i,j */
2241     /*---------------------------*/
2242     if (nsends) {
2243       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2244     }
2245   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2246 
2247   /* post receives, send and receive mat->a */
2248   /*----------------------------------------*/
2249   for (imdex=0; imdex<nrecvs; imdex++) {
2250     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2251   }
2252   for (i=0; i<nsends; i++){
2253     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2254   }
2255   count = nrecvs;
2256   while (count) {
2257     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2258     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2259     count--;
2260   }
2261   if (nsends) {
2262     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2263   }
2264 
2265   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2266 
2267   /* create redundant matrix */
2268   /*-------------------------*/
2269   if (reuse == MAT_INITIAL_MATRIX){
2270     /* compute rownz_max for preallocation */
2271     for (imdex=0; imdex<nrecvs; imdex++){
2272       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2273       rptr = rbuf_j[imdex];
2274       for (i=0; i<j; i++){
2275         ncols = rptr[i+1] - rptr[i];
2276         if (rownz_max < ncols) rownz_max = ncols;
2277       }
2278     }
2279 
2280     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2281     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2282     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2283     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2284     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2285   } else {
2286     C = *matredundant;
2287   }
2288 
2289   /* insert local matrix entries */
2290   rptr = sbuf_j;
2291   cols = sbuf_j + rend-rstart + 1;
2292   vals = sbuf_a;
2293   for (i=0; i<rend-rstart; i++){
2294     row   = i + rstart;
2295     ncols = rptr[i+1] - rptr[i];
2296     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2297     vals += ncols;
2298     cols += ncols;
2299   }
2300   /* insert received matrix entries */
2301   for (imdex=0; imdex<nrecvs; imdex++){
2302     rstart = rowrange[recv_rank[imdex]];
2303     rend   = rowrange[recv_rank[imdex]+1];
2304     rptr = rbuf_j[imdex];
2305     cols = rbuf_j[imdex] + rend-rstart + 1;
2306     vals = rbuf_a[imdex];
2307     for (i=0; i<rend-rstart; i++){
2308       row   = i + rstart;
2309       ncols = rptr[i+1] - rptr[i];
2310       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2311       vals += ncols;
2312       cols += ncols;
2313     }
2314   }
2315   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2316   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2317   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2318   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2319   if (reuse == MAT_INITIAL_MATRIX){
2320     PetscContainer container;
2321     *matredundant = C;
2322     /* create a supporting struct and attach it to C for reuse */
2323     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2324     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2325     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2326     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2327     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2328 
2329     redund->nzlocal = nzlocal;
2330     redund->nsends  = nsends;
2331     redund->nrecvs  = nrecvs;
2332     redund->send_rank = send_rank;
2333     redund->sbuf_nz = sbuf_nz;
2334     redund->sbuf_j  = sbuf_j;
2335     redund->sbuf_a  = sbuf_a;
2336     redund->rbuf_j  = rbuf_j;
2337     redund->rbuf_a  = rbuf_a;
2338 
2339     redund->MatDestroy = C->ops->destroy;
2340     C->ops->destroy    = MatDestroy_MatRedundant;
2341   }
2342   PetscFunctionReturn(0);
2343 }
2344 
2345 #undef __FUNCT__
2346 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2347 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2348 {
2349   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2350   PetscErrorCode ierr;
2351   PetscInt       i,*idxb = 0;
2352   PetscScalar    *va,*vb;
2353   Vec            vtmp;
2354 
2355   PetscFunctionBegin;
2356   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2357   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2358   if (idx) {
2359     for (i=0; i<A->rmap->n; i++) {
2360       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2361     }
2362   }
2363 
2364   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2365   if (idx) {
2366     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2367   }
2368   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2369   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2370 
2371   for (i=0; i<A->rmap->n; i++){
2372     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2373       va[i] = vb[i];
2374       if (idx) idx[i] = a->garray[idxb[i]];
2375     }
2376   }
2377 
2378   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2379   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2380   if (idxb) {
2381     ierr = PetscFree(idxb);CHKERRQ(ierr);
2382   }
2383   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2384   PetscFunctionReturn(0);
2385 }
2386 
2387 #undef __FUNCT__
2388 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2389 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2390 {
2391   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2392   PetscErrorCode ierr;
2393   PetscInt       i,*idxb = 0;
2394   PetscScalar    *va,*vb;
2395   Vec            vtmp;
2396 
2397   PetscFunctionBegin;
2398   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2399   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2400   if (idx) {
2401     for (i=0; i<A->cmap->n; i++) {
2402       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2403     }
2404   }
2405 
2406   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2407   if (idx) {
2408     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2409   }
2410   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2411   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2412 
2413   for (i=0; i<A->rmap->n; i++){
2414     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2415       va[i] = vb[i];
2416       if (idx) idx[i] = a->garray[idxb[i]];
2417     }
2418   }
2419 
2420   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2421   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2422   if (idxb) {
2423     ierr = PetscFree(idxb);CHKERRQ(ierr);
2424   }
2425   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2426   PetscFunctionReturn(0);
2427 }
2428 
2429 #undef __FUNCT__
2430 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2431 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2432 {
2433   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2434   PetscInt       n      = A->rmap->n;
2435   PetscInt       cstart = A->cmap->rstart;
2436   PetscInt      *cmap   = mat->garray;
2437   PetscInt      *diagIdx, *offdiagIdx;
2438   Vec            diagV, offdiagV;
2439   PetscScalar   *a, *diagA, *offdiagA;
2440   PetscInt       r;
2441   PetscErrorCode ierr;
2442 
2443   PetscFunctionBegin;
2444   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2445   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2446   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2447   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2448   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2449   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2450   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2451   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2452   for(r = 0; r < n; ++r) {
2453     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2454       a[r]   = diagA[r];
2455       idx[r] = cstart + diagIdx[r];
2456     } else {
2457       a[r]   = offdiagA[r];
2458       idx[r] = cmap[offdiagIdx[r]];
2459     }
2460   }
2461   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2462   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2463   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2464   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2465   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2466   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2467   PetscFunctionReturn(0);
2468 }
2469 
2470 #undef __FUNCT__
2471 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2472 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2473 {
2474   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2475   PetscInt       n      = A->rmap->n;
2476   PetscInt       cstart = A->cmap->rstart;
2477   PetscInt      *cmap   = mat->garray;
2478   PetscInt      *diagIdx, *offdiagIdx;
2479   Vec            diagV, offdiagV;
2480   PetscScalar   *a, *diagA, *offdiagA;
2481   PetscInt       r;
2482   PetscErrorCode ierr;
2483 
2484   PetscFunctionBegin;
2485   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2486   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2487   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2488   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2489   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2490   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2491   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2492   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2493   for(r = 0; r < n; ++r) {
2494     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2495       a[r]   = diagA[r];
2496       idx[r] = cstart + diagIdx[r];
2497     } else {
2498       a[r]   = offdiagA[r];
2499       idx[r] = cmap[offdiagIdx[r]];
2500     }
2501   }
2502   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2503   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2504   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2505   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2506   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2507   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2508   PetscFunctionReturn(0);
2509 }
2510 
2511 #undef __FUNCT__
2512 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2513 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[])
2514 {
2515   PetscErrorCode ierr;
2516 
2517   PetscFunctionBegin;
2518   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
2519   PetscFunctionReturn(0);
2520 }
2521 
2522 /* -------------------------------------------------------------------*/
2523 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2524        MatGetRow_MPIAIJ,
2525        MatRestoreRow_MPIAIJ,
2526        MatMult_MPIAIJ,
2527 /* 4*/ MatMultAdd_MPIAIJ,
2528        MatMultTranspose_MPIAIJ,
2529        MatMultTransposeAdd_MPIAIJ,
2530 #ifdef PETSC_HAVE_PBGL
2531        MatSolve_MPIAIJ,
2532 #else
2533        0,
2534 #endif
2535        0,
2536        0,
2537 /*10*/ 0,
2538        0,
2539        0,
2540        MatRelax_MPIAIJ,
2541        MatTranspose_MPIAIJ,
2542 /*15*/ MatGetInfo_MPIAIJ,
2543        MatEqual_MPIAIJ,
2544        MatGetDiagonal_MPIAIJ,
2545        MatDiagonalScale_MPIAIJ,
2546        MatNorm_MPIAIJ,
2547 /*20*/ MatAssemblyBegin_MPIAIJ,
2548        MatAssemblyEnd_MPIAIJ,
2549        0,
2550        MatSetOption_MPIAIJ,
2551        MatZeroEntries_MPIAIJ,
2552 /*25*/ MatZeroRows_MPIAIJ,
2553        0,
2554 #ifdef PETSC_HAVE_PBGL
2555        0,
2556 #else
2557        0,
2558 #endif
2559        0,
2560        0,
2561 /*30*/ MatSetUpPreallocation_MPIAIJ,
2562 #ifdef PETSC_HAVE_PBGL
2563        0,
2564 #else
2565        0,
2566 #endif
2567        0,
2568        0,
2569        0,
2570 /*35*/ MatDuplicate_MPIAIJ,
2571        0,
2572        0,
2573        0,
2574        0,
2575 /*40*/ MatAXPY_MPIAIJ,
2576        MatGetSubMatrices_MPIAIJ,
2577        MatIncreaseOverlap_MPIAIJ,
2578        MatGetValues_MPIAIJ,
2579        MatCopy_MPIAIJ,
2580 /*45*/ MatGetRowMax_MPIAIJ,
2581        MatScale_MPIAIJ,
2582        0,
2583        0,
2584        0,
2585 /*50*/ MatSetBlockSize_MPIAIJ,
2586        0,
2587        0,
2588        0,
2589        0,
2590 /*55*/ MatFDColoringCreate_MPIAIJ,
2591        0,
2592        MatSetUnfactored_MPIAIJ,
2593        MatPermute_MPIAIJ,
2594        0,
2595 /*60*/ MatGetSubMatrix_MPIAIJ,
2596        MatDestroy_MPIAIJ,
2597        MatView_MPIAIJ,
2598        0,
2599        0,
2600 /*65*/ 0,
2601        0,
2602        0,
2603        0,
2604        0,
2605 /*70*/ MatGetRowMaxAbs_MPIAIJ,
2606        MatGetRowMinAbs_MPIAIJ,
2607        0,
2608        MatSetColoring_MPIAIJ,
2609 #if defined(PETSC_HAVE_ADIC)
2610        MatSetValuesAdic_MPIAIJ,
2611 #else
2612        0,
2613 #endif
2614        MatSetValuesAdifor_MPIAIJ,
2615 /*75*/ 0,
2616        0,
2617        0,
2618        0,
2619        0,
2620 /*80*/ 0,
2621        0,
2622        0,
2623 /*84*/ MatLoad_MPIAIJ,
2624        0,
2625        0,
2626        0,
2627        0,
2628        0,
2629 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
2630        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2631        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2632        MatPtAP_Basic,
2633        MatPtAPSymbolic_MPIAIJ,
2634 /*95*/ MatPtAPNumeric_MPIAIJ,
2635        0,
2636        0,
2637        0,
2638        0,
2639 /*100*/0,
2640        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2641        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2642        MatConjugate_MPIAIJ,
2643        0,
2644 /*105*/MatSetValuesRow_MPIAIJ,
2645        MatRealPart_MPIAIJ,
2646        MatImaginaryPart_MPIAIJ,
2647        0,
2648        0,
2649 /*110*/0,
2650        MatGetRedundantMatrix_MPIAIJ,
2651        MatGetRowMin_MPIAIJ,
2652        0,
2653        0,
2654 /*115*/MatGetSeqNonzerostructure_MPIAIJ};
2655 
2656 /* ----------------------------------------------------------------------------------------*/
2657 
2658 EXTERN_C_BEGIN
2659 #undef __FUNCT__
2660 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2661 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2662 {
2663   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2664   PetscErrorCode ierr;
2665 
2666   PetscFunctionBegin;
2667   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2668   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2669   PetscFunctionReturn(0);
2670 }
2671 EXTERN_C_END
2672 
2673 EXTERN_C_BEGIN
2674 #undef __FUNCT__
2675 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2676 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2677 {
2678   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2679   PetscErrorCode ierr;
2680 
2681   PetscFunctionBegin;
2682   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2683   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2684   PetscFunctionReturn(0);
2685 }
2686 EXTERN_C_END
2687 
2688 #include "petscpc.h"
2689 EXTERN_C_BEGIN
2690 #undef __FUNCT__
2691 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2692 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2693 {
2694   Mat_MPIAIJ     *b;
2695   PetscErrorCode ierr;
2696   PetscInt       i;
2697 
2698   PetscFunctionBegin;
2699   B->preallocated = PETSC_TRUE;
2700   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2701   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2702   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2703   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2704 
2705   ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2706   ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2707   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2708   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2709   if (d_nnz) {
2710     for (i=0; i<B->rmap->n; i++) {
2711       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2712     }
2713   }
2714   if (o_nnz) {
2715     for (i=0; i<B->rmap->n; i++) {
2716       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2717     }
2718   }
2719   b = (Mat_MPIAIJ*)B->data;
2720 
2721   /* Explicitly create 2 MATSEQAIJ matrices. */
2722   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2723   ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2724   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2725   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2726   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2727   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2728   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2729   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2730 
2731   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2732   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2733 
2734   PetscFunctionReturn(0);
2735 }
2736 EXTERN_C_END
2737 
2738 #undef __FUNCT__
2739 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2740 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2741 {
2742   Mat            mat;
2743   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2744   PetscErrorCode ierr;
2745 
2746   PetscFunctionBegin;
2747   *newmat       = 0;
2748   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2749   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2750   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2751   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2752   a    = (Mat_MPIAIJ*)mat->data;
2753 
2754   mat->factor       = matin->factor;
2755   mat->rmap->bs      = matin->rmap->bs;
2756   mat->assembled    = PETSC_TRUE;
2757   mat->insertmode   = NOT_SET_VALUES;
2758   mat->preallocated = PETSC_TRUE;
2759 
2760   a->size           = oldmat->size;
2761   a->rank           = oldmat->rank;
2762   a->donotstash     = oldmat->donotstash;
2763   a->roworiented    = oldmat->roworiented;
2764   a->rowindices     = 0;
2765   a->rowvalues      = 0;
2766   a->getrowactive   = PETSC_FALSE;
2767 
2768   ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr);
2769   ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr);
2770 
2771   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2772   if (oldmat->colmap) {
2773 #if defined (PETSC_USE_CTABLE)
2774     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2775 #else
2776     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2777     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2778     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2779 #endif
2780   } else a->colmap = 0;
2781   if (oldmat->garray) {
2782     PetscInt len;
2783     len  = oldmat->B->cmap->n;
2784     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2785     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2786     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2787   } else a->garray = 0;
2788 
2789   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2790   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2791   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2792   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2793   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2794   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2795   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2796   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2797   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2798   *newmat = mat;
2799   PetscFunctionReturn(0);
2800 }
2801 
2802 #include "petscsys.h"
2803 
2804 #undef __FUNCT__
2805 #define __FUNCT__ "MatLoad_MPIAIJ"
2806 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2807 {
2808   Mat            A;
2809   PetscScalar    *vals,*svals;
2810   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2811   MPI_Status     status;
2812   PetscErrorCode ierr;
2813   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
2814   PetscInt       i,nz,j,rstart,rend,mmax;
2815   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2816   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2817   PetscInt       cend,cstart,n,*rowners;
2818   int            fd;
2819 
2820   PetscFunctionBegin;
2821   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2822   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2823   if (!rank) {
2824     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2825     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2826     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2827   }
2828 
2829   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2830   M = header[1]; N = header[2];
2831   /* determine ownership of all rows */
2832   m    = M/size + ((M % size) > rank);
2833   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2834   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2835 
2836   /* First process needs enough room for process with most rows */
2837   if (!rank) {
2838     mmax       = rowners[1];
2839     for (i=2; i<size; i++) {
2840       mmax = PetscMax(mmax,rowners[i]);
2841     }
2842   } else mmax = m;
2843 
2844   rowners[0] = 0;
2845   for (i=2; i<=size; i++) {
2846     rowners[i] += rowners[i-1];
2847   }
2848   rstart = rowners[rank];
2849   rend   = rowners[rank+1];
2850 
2851   /* distribute row lengths to all processors */
2852   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2853   if (!rank) {
2854     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2855     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2856     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2857     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2858     for (j=0; j<m; j++) {
2859       procsnz[0] += ourlens[j];
2860     }
2861     for (i=1; i<size; i++) {
2862       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2863       /* calculate the number of nonzeros on each processor */
2864       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2865         procsnz[i] += rowlengths[j];
2866       }
2867       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2868     }
2869     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2870   } else {
2871     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2872   }
2873 
2874   if (!rank) {
2875     /* determine max buffer needed and allocate it */
2876     maxnz = 0;
2877     for (i=0; i<size; i++) {
2878       maxnz = PetscMax(maxnz,procsnz[i]);
2879     }
2880     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2881 
2882     /* read in my part of the matrix column indices  */
2883     nz   = procsnz[0];
2884     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2885     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2886 
2887     /* read in every one elses and ship off */
2888     for (i=1; i<size; i++) {
2889       nz   = procsnz[i];
2890       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2891       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2892     }
2893     ierr = PetscFree(cols);CHKERRQ(ierr);
2894   } else {
2895     /* determine buffer space needed for message */
2896     nz = 0;
2897     for (i=0; i<m; i++) {
2898       nz += ourlens[i];
2899     }
2900     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2901 
2902     /* receive message of column indices*/
2903     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2904     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2905     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2906   }
2907 
2908   /* determine column ownership if matrix is not square */
2909   if (N != M) {
2910     n      = N/size + ((N % size) > rank);
2911     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2912     cstart = cend - n;
2913   } else {
2914     cstart = rstart;
2915     cend   = rend;
2916     n      = cend - cstart;
2917   }
2918 
2919   /* loop over local rows, determining number of off diagonal entries */
2920   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2921   jj = 0;
2922   for (i=0; i<m; i++) {
2923     for (j=0; j<ourlens[i]; j++) {
2924       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2925       jj++;
2926     }
2927   }
2928 
2929   /* create our matrix */
2930   for (i=0; i<m; i++) {
2931     ourlens[i] -= offlens[i];
2932   }
2933   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2934   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2935   ierr = MatSetType(A,type);CHKERRQ(ierr);
2936   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2937 
2938   for (i=0; i<m; i++) {
2939     ourlens[i] += offlens[i];
2940   }
2941 
2942   if (!rank) {
2943     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2944 
2945     /* read in my part of the matrix numerical values  */
2946     nz   = procsnz[0];
2947     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2948 
2949     /* insert into matrix */
2950     jj      = rstart;
2951     smycols = mycols;
2952     svals   = vals;
2953     for (i=0; i<m; i++) {
2954       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2955       smycols += ourlens[i];
2956       svals   += ourlens[i];
2957       jj++;
2958     }
2959 
2960     /* read in other processors and ship out */
2961     for (i=1; i<size; i++) {
2962       nz   = procsnz[i];
2963       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2964       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2965     }
2966     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2967   } else {
2968     /* receive numeric values */
2969     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2970 
2971     /* receive message of values*/
2972     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2973     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2974     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2975 
2976     /* insert into matrix */
2977     jj      = rstart;
2978     smycols = mycols;
2979     svals   = vals;
2980     for (i=0; i<m; i++) {
2981       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2982       smycols += ourlens[i];
2983       svals   += ourlens[i];
2984       jj++;
2985     }
2986   }
2987   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2988   ierr = PetscFree(vals);CHKERRQ(ierr);
2989   ierr = PetscFree(mycols);CHKERRQ(ierr);
2990   ierr = PetscFree(rowners);CHKERRQ(ierr);
2991 
2992   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2993   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2994   *newmat = A;
2995   PetscFunctionReturn(0);
2996 }
2997 
2998 #undef __FUNCT__
2999 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3000 /*
3001     Not great since it makes two copies of the submatrix, first an SeqAIJ
3002   in local and then by concatenating the local matrices the end result.
3003   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3004 */
3005 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3006 {
3007   PetscErrorCode ierr;
3008   PetscMPIInt    rank,size;
3009   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3010   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3011   Mat            *local,M,Mreuse;
3012   MatScalar      *vwork,*aa;
3013   MPI_Comm       comm = ((PetscObject)mat)->comm;
3014   Mat_SeqAIJ     *aij;
3015 
3016 
3017   PetscFunctionBegin;
3018   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3019   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3020 
3021   if (call ==  MAT_REUSE_MATRIX) {
3022     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3023     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3024     local = &Mreuse;
3025     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3026   } else {
3027     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3028     Mreuse = *local;
3029     ierr   = PetscFree(local);CHKERRQ(ierr);
3030   }
3031 
3032   /*
3033       m - number of local rows
3034       n - number of columns (same on all processors)
3035       rstart - first row in new global matrix generated
3036   */
3037   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3038   if (call == MAT_INITIAL_MATRIX) {
3039     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3040     ii  = aij->i;
3041     jj  = aij->j;
3042 
3043     /*
3044         Determine the number of non-zeros in the diagonal and off-diagonal
3045         portions of the matrix in order to do correct preallocation
3046     */
3047 
3048     /* first get start and end of "diagonal" columns */
3049     if (csize == PETSC_DECIDE) {
3050       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3051       if (mglobal == n) { /* square matrix */
3052 	nlocal = m;
3053       } else {
3054         nlocal = n/size + ((n % size) > rank);
3055       }
3056     } else {
3057       nlocal = csize;
3058     }
3059     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3060     rstart = rend - nlocal;
3061     if (rank == size - 1 && rend != n) {
3062       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3063     }
3064 
3065     /* next, compute all the lengths */
3066     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3067     olens = dlens + m;
3068     for (i=0; i<m; i++) {
3069       jend = ii[i+1] - ii[i];
3070       olen = 0;
3071       dlen = 0;
3072       for (j=0; j<jend; j++) {
3073         if (*jj < rstart || *jj >= rend) olen++;
3074         else dlen++;
3075         jj++;
3076       }
3077       olens[i] = olen;
3078       dlens[i] = dlen;
3079     }
3080     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3081     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3082     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3083     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3084     ierr = PetscFree(dlens);CHKERRQ(ierr);
3085   } else {
3086     PetscInt ml,nl;
3087 
3088     M = *newmat;
3089     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3090     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3091     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3092     /*
3093          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3094        rather than the slower MatSetValues().
3095     */
3096     M->was_assembled = PETSC_TRUE;
3097     M->assembled     = PETSC_FALSE;
3098   }
3099   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3100   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3101   ii  = aij->i;
3102   jj  = aij->j;
3103   aa  = aij->a;
3104   for (i=0; i<m; i++) {
3105     row   = rstart + i;
3106     nz    = ii[i+1] - ii[i];
3107     cwork = jj;     jj += nz;
3108     vwork = aa;     aa += nz;
3109     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3110   }
3111 
3112   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3113   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3114   *newmat = M;
3115 
3116   /* save submatrix used in processor for next request */
3117   if (call ==  MAT_INITIAL_MATRIX) {
3118     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3119     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3120   }
3121 
3122   PetscFunctionReturn(0);
3123 }
3124 
3125 EXTERN_C_BEGIN
3126 #undef __FUNCT__
3127 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3128 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3129 {
3130   PetscInt       m,cstart, cend,j,nnz,i,d;
3131   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3132   const PetscInt *JJ;
3133   PetscScalar    *values;
3134   PetscErrorCode ierr;
3135 
3136   PetscFunctionBegin;
3137   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3138 
3139   ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3140   ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3141   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
3142   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
3143   m      = B->rmap->n;
3144   cstart = B->cmap->rstart;
3145   cend   = B->cmap->rend;
3146   rstart = B->rmap->rstart;
3147 
3148   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
3149   o_nnz = d_nnz + m;
3150 
3151 #if defined(PETSC_USE_DEBUGGING)
3152   for (i=0; i<m; i++) {
3153     nnz     = Ii[i+1]- Ii[i];
3154     JJ      = J + Ii[i];
3155     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3156     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3157     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3158     for (j=1; j<nnz; j++) {
3159       if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j);
3160     }
3161   }
3162 #endif
3163 
3164   for (i=0; i<m; i++) {
3165     nnz     = Ii[i+1]- Ii[i];
3166     JJ      = J + Ii[i];
3167     nnz_max = PetscMax(nnz_max,nnz);
3168     for (j=0; j<nnz; j++) {
3169       if (*JJ >= cstart) break;
3170       JJ++;
3171     }
3172     d = 0;
3173     for (; j<nnz; j++) {
3174       if (*JJ++ >= cend) break;
3175       d++;
3176     }
3177     d_nnz[i] = d;
3178     o_nnz[i] = nnz - d;
3179   }
3180   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3181   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
3182 
3183   if (v) values = (PetscScalar*)v;
3184   else {
3185     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3186     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3187   }
3188 
3189   for (i=0; i<m; i++) {
3190     ii   = i + rstart;
3191     nnz  = Ii[i+1]- Ii[i];
3192     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3193   }
3194   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3195   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3196 
3197   if (!v) {
3198     ierr = PetscFree(values);CHKERRQ(ierr);
3199   }
3200   PetscFunctionReturn(0);
3201 }
3202 EXTERN_C_END
3203 
3204 #undef __FUNCT__
3205 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3206 /*@
3207    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3208    (the default parallel PETSc format).
3209 
3210    Collective on MPI_Comm
3211 
3212    Input Parameters:
3213 +  B - the matrix
3214 .  i - the indices into j for the start of each local row (starts with zero)
3215 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3216 -  v - optional values in the matrix
3217 
3218    Level: developer
3219 
3220    Notes:
3221        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3222      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3223      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3224 
3225        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3226 
3227        The format which is used for the sparse matrix input, is equivalent to a
3228     row-major ordering.. i.e for the following matrix, the input data expected is
3229     as shown:
3230 
3231         1 0 0
3232         2 0 3     P0
3233        -------
3234         4 5 6     P1
3235 
3236      Process0 [P0]: rows_owned=[0,1]
3237         i =  {0,1,3}  [size = nrow+1  = 2+1]
3238         j =  {0,0,2}  [size = nz = 6]
3239         v =  {1,2,3}  [size = nz = 6]
3240 
3241      Process1 [P1]: rows_owned=[2]
3242         i =  {0,3}    [size = nrow+1  = 1+1]
3243         j =  {0,1,2}  [size = nz = 6]
3244         v =  {4,5,6}  [size = nz = 6]
3245 
3246       The column indices for each row MUST be sorted.
3247 
3248 .keywords: matrix, aij, compressed row, sparse, parallel
3249 
3250 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3251           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3252 @*/
3253 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3254 {
3255   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3256 
3257   PetscFunctionBegin;
3258   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3259   if (f) {
3260     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3261   }
3262   PetscFunctionReturn(0);
3263 }
3264 
3265 #undef __FUNCT__
3266 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3267 /*@C
3268    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3269    (the default parallel PETSc format).  For good matrix assembly performance
3270    the user should preallocate the matrix storage by setting the parameters
3271    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3272    performance can be increased by more than a factor of 50.
3273 
3274    Collective on MPI_Comm
3275 
3276    Input Parameters:
3277 +  A - the matrix
3278 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3279            (same value is used for all local rows)
3280 .  d_nnz - array containing the number of nonzeros in the various rows of the
3281            DIAGONAL portion of the local submatrix (possibly different for each row)
3282            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3283            The size of this array is equal to the number of local rows, i.e 'm'.
3284            You must leave room for the diagonal entry even if it is zero.
3285 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3286            submatrix (same value is used for all local rows).
3287 -  o_nnz - array containing the number of nonzeros in the various rows of the
3288            OFF-DIAGONAL portion of the local submatrix (possibly different for
3289            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3290            structure. The size of this array is equal to the number
3291            of local rows, i.e 'm'.
3292 
3293    If the *_nnz parameter is given then the *_nz parameter is ignored
3294 
3295    The AIJ format (also called the Yale sparse matrix format or
3296    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3297    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3298 
3299    The parallel matrix is partitioned such that the first m0 rows belong to
3300    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3301    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3302 
3303    The DIAGONAL portion of the local submatrix of a processor can be defined
3304    as the submatrix which is obtained by extraction the part corresponding
3305    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3306    first row that belongs to the processor, and r2 is the last row belonging
3307    to the this processor. This is a square mxm matrix. The remaining portion
3308    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3309 
3310    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3311 
3312    You can call MatGetInfo() to get information on how effective the preallocation was;
3313    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3314    You can also run with the option -info and look for messages with the string
3315    malloc in them to see if additional memory allocation was needed.
3316 
3317    Example usage:
3318 
3319    Consider the following 8x8 matrix with 34 non-zero values, that is
3320    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3321    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3322    as follows:
3323 
3324 .vb
3325             1  2  0  |  0  3  0  |  0  4
3326     Proc0   0  5  6  |  7  0  0  |  8  0
3327             9  0 10  | 11  0  0  | 12  0
3328     -------------------------------------
3329            13  0 14  | 15 16 17  |  0  0
3330     Proc1   0 18  0  | 19 20 21  |  0  0
3331             0  0  0  | 22 23  0  | 24  0
3332     -------------------------------------
3333     Proc2  25 26 27  |  0  0 28  | 29  0
3334            30  0  0  | 31 32 33  |  0 34
3335 .ve
3336 
3337    This can be represented as a collection of submatrices as:
3338 
3339 .vb
3340       A B C
3341       D E F
3342       G H I
3343 .ve
3344 
3345    Where the submatrices A,B,C are owned by proc0, D,E,F are
3346    owned by proc1, G,H,I are owned by proc2.
3347 
3348    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3349    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3350    The 'M','N' parameters are 8,8, and have the same values on all procs.
3351 
3352    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3353    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3354    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3355    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3356    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3357    matrix, ans [DF] as another SeqAIJ matrix.
3358 
3359    When d_nz, o_nz parameters are specified, d_nz storage elements are
3360    allocated for every row of the local diagonal submatrix, and o_nz
3361    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3362    One way to choose d_nz and o_nz is to use the max nonzerors per local
3363    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3364    In this case, the values of d_nz,o_nz are:
3365 .vb
3366      proc0 : dnz = 2, o_nz = 2
3367      proc1 : dnz = 3, o_nz = 2
3368      proc2 : dnz = 1, o_nz = 4
3369 .ve
3370    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3371    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3372    for proc3. i.e we are using 12+15+10=37 storage locations to store
3373    34 values.
3374 
3375    When d_nnz, o_nnz parameters are specified, the storage is specified
3376    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3377    In the above case the values for d_nnz,o_nnz are:
3378 .vb
3379      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3380      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3381      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3382 .ve
3383    Here the space allocated is sum of all the above values i.e 34, and
3384    hence pre-allocation is perfect.
3385 
3386    Level: intermediate
3387 
3388 .keywords: matrix, aij, compressed row, sparse, parallel
3389 
3390 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3391           MPIAIJ, MatGetInfo()
3392 @*/
3393 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3394 {
3395   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3396 
3397   PetscFunctionBegin;
3398   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3399   if (f) {
3400     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3401   }
3402   PetscFunctionReturn(0);
3403 }
3404 
3405 #undef __FUNCT__
3406 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3407 /*@
3408      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3409          CSR format the local rows.
3410 
3411    Collective on MPI_Comm
3412 
3413    Input Parameters:
3414 +  comm - MPI communicator
3415 .  m - number of local rows (Cannot be PETSC_DECIDE)
3416 .  n - This value should be the same as the local size used in creating the
3417        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3418        calculated if N is given) For square matrices n is almost always m.
3419 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3420 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3421 .   i - row indices
3422 .   j - column indices
3423 -   a - matrix values
3424 
3425    Output Parameter:
3426 .   mat - the matrix
3427 
3428    Level: intermediate
3429 
3430    Notes:
3431        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3432      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3433      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3434 
3435        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3436 
3437        The format which is used for the sparse matrix input, is equivalent to a
3438     row-major ordering.. i.e for the following matrix, the input data expected is
3439     as shown:
3440 
3441         1 0 0
3442         2 0 3     P0
3443        -------
3444         4 5 6     P1
3445 
3446      Process0 [P0]: rows_owned=[0,1]
3447         i =  {0,1,3}  [size = nrow+1  = 2+1]
3448         j =  {0,0,2}  [size = nz = 6]
3449         v =  {1,2,3}  [size = nz = 6]
3450 
3451      Process1 [P1]: rows_owned=[2]
3452         i =  {0,3}    [size = nrow+1  = 1+1]
3453         j =  {0,1,2}  [size = nz = 6]
3454         v =  {4,5,6}  [size = nz = 6]
3455 
3456 .keywords: matrix, aij, compressed row, sparse, parallel
3457 
3458 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3459           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3460 @*/
3461 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3462 {
3463   PetscErrorCode ierr;
3464 
3465  PetscFunctionBegin;
3466   if (i[0]) {
3467     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3468   }
3469   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3470   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3471   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3472   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3473   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3474   PetscFunctionReturn(0);
3475 }
3476 
3477 #undef __FUNCT__
3478 #define __FUNCT__ "MatCreateMPIAIJ"
3479 /*@C
3480    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3481    (the default parallel PETSc format).  For good matrix assembly performance
3482    the user should preallocate the matrix storage by setting the parameters
3483    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3484    performance can be increased by more than a factor of 50.
3485 
3486    Collective on MPI_Comm
3487 
3488    Input Parameters:
3489 +  comm - MPI communicator
3490 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3491            This value should be the same as the local size used in creating the
3492            y vector for the matrix-vector product y = Ax.
3493 .  n - This value should be the same as the local size used in creating the
3494        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3495        calculated if N is given) For square matrices n is almost always m.
3496 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3497 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3498 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3499            (same value is used for all local rows)
3500 .  d_nnz - array containing the number of nonzeros in the various rows of the
3501            DIAGONAL portion of the local submatrix (possibly different for each row)
3502            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3503            The size of this array is equal to the number of local rows, i.e 'm'.
3504            You must leave room for the diagonal entry even if it is zero.
3505 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3506            submatrix (same value is used for all local rows).
3507 -  o_nnz - array containing the number of nonzeros in the various rows of the
3508            OFF-DIAGONAL portion of the local submatrix (possibly different for
3509            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3510            structure. The size of this array is equal to the number
3511            of local rows, i.e 'm'.
3512 
3513    Output Parameter:
3514 .  A - the matrix
3515 
3516    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3517    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3518    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3519 
3520    Notes:
3521    If the *_nnz parameter is given then the *_nz parameter is ignored
3522 
3523    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3524    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3525    storage requirements for this matrix.
3526 
3527    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3528    processor than it must be used on all processors that share the object for
3529    that argument.
3530 
3531    The user MUST specify either the local or global matrix dimensions
3532    (possibly both).
3533 
3534    The parallel matrix is partitioned across processors such that the
3535    first m0 rows belong to process 0, the next m1 rows belong to
3536    process 1, the next m2 rows belong to process 2 etc.. where
3537    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3538    values corresponding to [m x N] submatrix.
3539 
3540    The columns are logically partitioned with the n0 columns belonging
3541    to 0th partition, the next n1 columns belonging to the next
3542    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3543 
3544    The DIAGONAL portion of the local submatrix on any given processor
3545    is the submatrix corresponding to the rows and columns m,n
3546    corresponding to the given processor. i.e diagonal matrix on
3547    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3548    etc. The remaining portion of the local submatrix [m x (N-n)]
3549    constitute the OFF-DIAGONAL portion. The example below better
3550    illustrates this concept.
3551 
3552    For a square global matrix we define each processor's diagonal portion
3553    to be its local rows and the corresponding columns (a square submatrix);
3554    each processor's off-diagonal portion encompasses the remainder of the
3555    local matrix (a rectangular submatrix).
3556 
3557    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3558 
3559    When calling this routine with a single process communicator, a matrix of
3560    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3561    type of communicator, use the construction mechanism:
3562      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
3563 
3564    By default, this format uses inodes (identical nodes) when possible.
3565    We search for consecutive rows with the same nonzero structure, thereby
3566    reusing matrix information to achieve increased efficiency.
3567 
3568    Options Database Keys:
3569 +  -mat_no_inode  - Do not use inodes
3570 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3571 -  -mat_aij_oneindex - Internally use indexing starting at 1
3572         rather than 0.  Note that when calling MatSetValues(),
3573         the user still MUST index entries starting at 0!
3574 
3575 
3576    Example usage:
3577 
3578    Consider the following 8x8 matrix with 34 non-zero values, that is
3579    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3580    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3581    as follows:
3582 
3583 .vb
3584             1  2  0  |  0  3  0  |  0  4
3585     Proc0   0  5  6  |  7  0  0  |  8  0
3586             9  0 10  | 11  0  0  | 12  0
3587     -------------------------------------
3588            13  0 14  | 15 16 17  |  0  0
3589     Proc1   0 18  0  | 19 20 21  |  0  0
3590             0  0  0  | 22 23  0  | 24  0
3591     -------------------------------------
3592     Proc2  25 26 27  |  0  0 28  | 29  0
3593            30  0  0  | 31 32 33  |  0 34
3594 .ve
3595 
3596    This can be represented as a collection of submatrices as:
3597 
3598 .vb
3599       A B C
3600       D E F
3601       G H I
3602 .ve
3603 
3604    Where the submatrices A,B,C are owned by proc0, D,E,F are
3605    owned by proc1, G,H,I are owned by proc2.
3606 
3607    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3608    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3609    The 'M','N' parameters are 8,8, and have the same values on all procs.
3610 
3611    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3612    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3613    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3614    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3615    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3616    matrix, ans [DF] as another SeqAIJ matrix.
3617 
3618    When d_nz, o_nz parameters are specified, d_nz storage elements are
3619    allocated for every row of the local diagonal submatrix, and o_nz
3620    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3621    One way to choose d_nz and o_nz is to use the max nonzerors per local
3622    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3623    In this case, the values of d_nz,o_nz are:
3624 .vb
3625      proc0 : dnz = 2, o_nz = 2
3626      proc1 : dnz = 3, o_nz = 2
3627      proc2 : dnz = 1, o_nz = 4
3628 .ve
3629    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3630    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3631    for proc3. i.e we are using 12+15+10=37 storage locations to store
3632    34 values.
3633 
3634    When d_nnz, o_nnz parameters are specified, the storage is specified
3635    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3636    In the above case the values for d_nnz,o_nnz are:
3637 .vb
3638      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3639      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3640      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3641 .ve
3642    Here the space allocated is sum of all the above values i.e 34, and
3643    hence pre-allocation is perfect.
3644 
3645    Level: intermediate
3646 
3647 .keywords: matrix, aij, compressed row, sparse, parallel
3648 
3649 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3650           MPIAIJ, MatCreateMPIAIJWithArrays()
3651 @*/
3652 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3653 {
3654   PetscErrorCode ierr;
3655   PetscMPIInt    size;
3656 
3657   PetscFunctionBegin;
3658   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3659   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3660   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3661   if (size > 1) {
3662     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3663     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3664   } else {
3665     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3666     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3667   }
3668   PetscFunctionReturn(0);
3669 }
3670 
3671 #undef __FUNCT__
3672 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3673 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3674 {
3675   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3676 
3677   PetscFunctionBegin;
3678   *Ad     = a->A;
3679   *Ao     = a->B;
3680   *colmap = a->garray;
3681   PetscFunctionReturn(0);
3682 }
3683 
3684 #undef __FUNCT__
3685 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3686 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3687 {
3688   PetscErrorCode ierr;
3689   PetscInt       i;
3690   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3691 
3692   PetscFunctionBegin;
3693   if (coloring->ctype == IS_COLORING_GLOBAL) {
3694     ISColoringValue *allcolors,*colors;
3695     ISColoring      ocoloring;
3696 
3697     /* set coloring for diagonal portion */
3698     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3699 
3700     /* set coloring for off-diagonal portion */
3701     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3702     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3703     for (i=0; i<a->B->cmap->n; i++) {
3704       colors[i] = allcolors[a->garray[i]];
3705     }
3706     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3707     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3708     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3709     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3710   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3711     ISColoringValue *colors;
3712     PetscInt        *larray;
3713     ISColoring      ocoloring;
3714 
3715     /* set coloring for diagonal portion */
3716     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3717     for (i=0; i<a->A->cmap->n; i++) {
3718       larray[i] = i + A->cmap->rstart;
3719     }
3720     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3721     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3722     for (i=0; i<a->A->cmap->n; i++) {
3723       colors[i] = coloring->colors[larray[i]];
3724     }
3725     ierr = PetscFree(larray);CHKERRQ(ierr);
3726     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3727     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3728     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3729 
3730     /* set coloring for off-diagonal portion */
3731     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3732     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3733     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3734     for (i=0; i<a->B->cmap->n; i++) {
3735       colors[i] = coloring->colors[larray[i]];
3736     }
3737     ierr = PetscFree(larray);CHKERRQ(ierr);
3738     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3739     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3740     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3741   } else {
3742     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3743   }
3744 
3745   PetscFunctionReturn(0);
3746 }
3747 
3748 #if defined(PETSC_HAVE_ADIC)
3749 #undef __FUNCT__
3750 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3751 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3752 {
3753   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3754   PetscErrorCode ierr;
3755 
3756   PetscFunctionBegin;
3757   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3758   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3759   PetscFunctionReturn(0);
3760 }
3761 #endif
3762 
3763 #undef __FUNCT__
3764 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3765 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3766 {
3767   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3768   PetscErrorCode ierr;
3769 
3770   PetscFunctionBegin;
3771   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3772   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3773   PetscFunctionReturn(0);
3774 }
3775 
3776 #undef __FUNCT__
3777 #define __FUNCT__ "MatMerge"
3778 /*@
3779       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3780                  matrices from each processor
3781 
3782     Collective on MPI_Comm
3783 
3784    Input Parameters:
3785 +    comm - the communicators the parallel matrix will live on
3786 .    inmat - the input sequential matrices
3787 .    n - number of local columns (or PETSC_DECIDE)
3788 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3789 
3790    Output Parameter:
3791 .    outmat - the parallel matrix generated
3792 
3793     Level: advanced
3794 
3795    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3796 
3797 @*/
3798 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3799 {
3800   PetscErrorCode ierr;
3801   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3802   PetscInt       *indx;
3803   PetscScalar    *values;
3804 
3805   PetscFunctionBegin;
3806   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3807   if (scall == MAT_INITIAL_MATRIX){
3808     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3809     if (n == PETSC_DECIDE){
3810       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3811     }
3812     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3813     rstart -= m;
3814 
3815     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3816     for (i=0;i<m;i++) {
3817       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3818       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3819       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3820     }
3821     /* This routine will ONLY return MPIAIJ type matrix */
3822     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3823     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3824     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3825     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3826     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3827 
3828   } else if (scall == MAT_REUSE_MATRIX){
3829     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3830   } else {
3831     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3832   }
3833 
3834   for (i=0;i<m;i++) {
3835     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3836     Ii    = i + rstart;
3837     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3838     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3839   }
3840   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3841   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3842   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3843 
3844   PetscFunctionReturn(0);
3845 }
3846 
3847 #undef __FUNCT__
3848 #define __FUNCT__ "MatFileSplit"
3849 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3850 {
3851   PetscErrorCode    ierr;
3852   PetscMPIInt       rank;
3853   PetscInt          m,N,i,rstart,nnz;
3854   size_t            len;
3855   const PetscInt    *indx;
3856   PetscViewer       out;
3857   char              *name;
3858   Mat               B;
3859   const PetscScalar *values;
3860 
3861   PetscFunctionBegin;
3862   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3863   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3864   /* Should this be the type of the diagonal block of A? */
3865   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3866   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3867   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3868   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3869   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3870   for (i=0;i<m;i++) {
3871     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3872     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3873     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3874   }
3875   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3876   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3877 
3878   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3879   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3880   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3881   sprintf(name,"%s.%d",outfile,rank);
3882   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3883   ierr = PetscFree(name);
3884   ierr = MatView(B,out);CHKERRQ(ierr);
3885   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3886   ierr = MatDestroy(B);CHKERRQ(ierr);
3887   PetscFunctionReturn(0);
3888 }
3889 
3890 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3891 #undef __FUNCT__
3892 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3893 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3894 {
3895   PetscErrorCode       ierr;
3896   Mat_Merge_SeqsToMPI  *merge;
3897   PetscContainer       container;
3898 
3899   PetscFunctionBegin;
3900   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3901   if (container) {
3902     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3903     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3904     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3905     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3906     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3907     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3908     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3909     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3910     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3911     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3912     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3913     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
3914 
3915     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
3916     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3917   }
3918   ierr = PetscFree(merge);CHKERRQ(ierr);
3919 
3920   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3921   PetscFunctionReturn(0);
3922 }
3923 
3924 #include "../src/mat/utils/freespace.h"
3925 #include "petscbt.h"
3926 
3927 #undef __FUNCT__
3928 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3929 /*@C
3930       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3931                  matrices from each processor
3932 
3933     Collective on MPI_Comm
3934 
3935    Input Parameters:
3936 +    comm - the communicators the parallel matrix will live on
3937 .    seqmat - the input sequential matrices
3938 .    m - number of local rows (or PETSC_DECIDE)
3939 .    n - number of local columns (or PETSC_DECIDE)
3940 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3941 
3942    Output Parameter:
3943 .    mpimat - the parallel matrix generated
3944 
3945     Level: advanced
3946 
3947    Notes:
3948      The dimensions of the sequential matrix in each processor MUST be the same.
3949      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3950      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3951 @*/
3952 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3953 {
3954   PetscErrorCode       ierr;
3955   MPI_Comm             comm=((PetscObject)mpimat)->comm;
3956   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3957   PetscMPIInt          size,rank,taga,*len_s;
3958   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
3959   PetscInt             proc,m;
3960   PetscInt             **buf_ri,**buf_rj;
3961   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3962   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3963   MPI_Request          *s_waits,*r_waits;
3964   MPI_Status           *status;
3965   MatScalar            *aa=a->a;
3966   MatScalar            **abuf_r,*ba_i;
3967   Mat_Merge_SeqsToMPI  *merge;
3968   PetscContainer       container;
3969 
3970   PetscFunctionBegin;
3971   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3972 
3973   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3974   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3975 
3976   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3977   if (container) {
3978     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3979   }
3980   bi     = merge->bi;
3981   bj     = merge->bj;
3982   buf_ri = merge->buf_ri;
3983   buf_rj = merge->buf_rj;
3984 
3985   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3986   owners = merge->rowmap.range;
3987   len_s  = merge->len_s;
3988 
3989   /* send and recv matrix values */
3990   /*-----------------------------*/
3991   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3992   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3993 
3994   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3995   for (proc=0,k=0; proc<size; proc++){
3996     if (!len_s[proc]) continue;
3997     i = owners[proc];
3998     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3999     k++;
4000   }
4001 
4002   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4003   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4004   ierr = PetscFree(status);CHKERRQ(ierr);
4005 
4006   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4007   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4008 
4009   /* insert mat values of mpimat */
4010   /*----------------------------*/
4011   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4012   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4013   nextrow = buf_ri_k + merge->nrecv;
4014   nextai  = nextrow + merge->nrecv;
4015 
4016   for (k=0; k<merge->nrecv; k++){
4017     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4018     nrows = *(buf_ri_k[k]);
4019     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4020     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4021   }
4022 
4023   /* set values of ba */
4024   m = merge->rowmap.n;
4025   for (i=0; i<m; i++) {
4026     arow = owners[rank] + i;
4027     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4028     bnzi = bi[i+1] - bi[i];
4029     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4030 
4031     /* add local non-zero vals of this proc's seqmat into ba */
4032     anzi = ai[arow+1] - ai[arow];
4033     aj   = a->j + ai[arow];
4034     aa   = a->a + ai[arow];
4035     nextaj = 0;
4036     for (j=0; nextaj<anzi; j++){
4037       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4038         ba_i[j] += aa[nextaj++];
4039       }
4040     }
4041 
4042     /* add received vals into ba */
4043     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4044       /* i-th row */
4045       if (i == *nextrow[k]) {
4046         anzi = *(nextai[k]+1) - *nextai[k];
4047         aj   = buf_rj[k] + *(nextai[k]);
4048         aa   = abuf_r[k] + *(nextai[k]);
4049         nextaj = 0;
4050         for (j=0; nextaj<anzi; j++){
4051           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4052             ba_i[j] += aa[nextaj++];
4053           }
4054         }
4055         nextrow[k]++; nextai[k]++;
4056       }
4057     }
4058     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4059   }
4060   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4061   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4062 
4063   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4064   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4065   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4066   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4067   PetscFunctionReturn(0);
4068 }
4069 
4070 #undef __FUNCT__
4071 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4072 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4073 {
4074   PetscErrorCode       ierr;
4075   Mat                  B_mpi;
4076   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4077   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4078   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4079   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4080   PetscInt             len,proc,*dnz,*onz;
4081   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4082   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4083   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4084   MPI_Status           *status;
4085   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4086   PetscBT              lnkbt;
4087   Mat_Merge_SeqsToMPI  *merge;
4088   PetscContainer       container;
4089 
4090   PetscFunctionBegin;
4091   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4092 
4093   /* make sure it is a PETSc comm */
4094   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4095   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4096   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4097 
4098   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4099   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4100 
4101   /* determine row ownership */
4102   /*---------------------------------------------------------*/
4103   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
4104   merge->rowmap.n = m;
4105   merge->rowmap.N = M;
4106   merge->rowmap.bs = 1;
4107   ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr);
4108   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4109   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4110 
4111   m      = merge->rowmap.n;
4112   M      = merge->rowmap.N;
4113   owners = merge->rowmap.range;
4114 
4115   /* determine the number of messages to send, their lengths */
4116   /*---------------------------------------------------------*/
4117   len_s  = merge->len_s;
4118 
4119   len = 0;  /* length of buf_si[] */
4120   merge->nsend = 0;
4121   for (proc=0; proc<size; proc++){
4122     len_si[proc] = 0;
4123     if (proc == rank){
4124       len_s[proc] = 0;
4125     } else {
4126       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4127       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4128     }
4129     if (len_s[proc]) {
4130       merge->nsend++;
4131       nrows = 0;
4132       for (i=owners[proc]; i<owners[proc+1]; i++){
4133         if (ai[i+1] > ai[i]) nrows++;
4134       }
4135       len_si[proc] = 2*(nrows+1);
4136       len += len_si[proc];
4137     }
4138   }
4139 
4140   /* determine the number and length of messages to receive for ij-structure */
4141   /*-------------------------------------------------------------------------*/
4142   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4143   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4144 
4145   /* post the Irecv of j-structure */
4146   /*-------------------------------*/
4147   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4148   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4149 
4150   /* post the Isend of j-structure */
4151   /*--------------------------------*/
4152   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
4153   sj_waits = si_waits + merge->nsend;
4154 
4155   for (proc=0, k=0; proc<size; proc++){
4156     if (!len_s[proc]) continue;
4157     i = owners[proc];
4158     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4159     k++;
4160   }
4161 
4162   /* receives and sends of j-structure are complete */
4163   /*------------------------------------------------*/
4164   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4165   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4166 
4167   /* send and recv i-structure */
4168   /*---------------------------*/
4169   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4170   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4171 
4172   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4173   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4174   for (proc=0,k=0; proc<size; proc++){
4175     if (!len_s[proc]) continue;
4176     /* form outgoing message for i-structure:
4177          buf_si[0]:                 nrows to be sent
4178                [1:nrows]:           row index (global)
4179                [nrows+1:2*nrows+1]: i-structure index
4180     */
4181     /*-------------------------------------------*/
4182     nrows = len_si[proc]/2 - 1;
4183     buf_si_i    = buf_si + nrows+1;
4184     buf_si[0]   = nrows;
4185     buf_si_i[0] = 0;
4186     nrows = 0;
4187     for (i=owners[proc]; i<owners[proc+1]; i++){
4188       anzi = ai[i+1] - ai[i];
4189       if (anzi) {
4190         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4191         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4192         nrows++;
4193       }
4194     }
4195     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4196     k++;
4197     buf_si += len_si[proc];
4198   }
4199 
4200   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4201   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4202 
4203   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4204   for (i=0; i<merge->nrecv; i++){
4205     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4206   }
4207 
4208   ierr = PetscFree(len_si);CHKERRQ(ierr);
4209   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4210   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4211   ierr = PetscFree(si_waits);CHKERRQ(ierr);
4212   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4213   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4214   ierr = PetscFree(status);CHKERRQ(ierr);
4215 
4216   /* compute a local seq matrix in each processor */
4217   /*----------------------------------------------*/
4218   /* allocate bi array and free space for accumulating nonzero column info */
4219   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4220   bi[0] = 0;
4221 
4222   /* create and initialize a linked list */
4223   nlnk = N+1;
4224   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4225 
4226   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4227   len = 0;
4228   len  = ai[owners[rank+1]] - ai[owners[rank]];
4229   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4230   current_space = free_space;
4231 
4232   /* determine symbolic info for each local row */
4233   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4234   nextrow = buf_ri_k + merge->nrecv;
4235   nextai  = nextrow + merge->nrecv;
4236   for (k=0; k<merge->nrecv; k++){
4237     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4238     nrows = *buf_ri_k[k];
4239     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4240     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4241   }
4242 
4243   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4244   len = 0;
4245   for (i=0;i<m;i++) {
4246     bnzi   = 0;
4247     /* add local non-zero cols of this proc's seqmat into lnk */
4248     arow   = owners[rank] + i;
4249     anzi   = ai[arow+1] - ai[arow];
4250     aj     = a->j + ai[arow];
4251     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4252     bnzi += nlnk;
4253     /* add received col data into lnk */
4254     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4255       if (i == *nextrow[k]) { /* i-th row */
4256         anzi = *(nextai[k]+1) - *nextai[k];
4257         aj   = buf_rj[k] + *nextai[k];
4258         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4259         bnzi += nlnk;
4260         nextrow[k]++; nextai[k]++;
4261       }
4262     }
4263     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4264 
4265     /* if free space is not available, make more free space */
4266     if (current_space->local_remaining<bnzi) {
4267       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4268       nspacedouble++;
4269     }
4270     /* copy data into free space, then initialize lnk */
4271     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4272     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4273 
4274     current_space->array           += bnzi;
4275     current_space->local_used      += bnzi;
4276     current_space->local_remaining -= bnzi;
4277 
4278     bi[i+1] = bi[i] + bnzi;
4279   }
4280 
4281   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4282 
4283   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4284   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4285   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4286 
4287   /* create symbolic parallel matrix B_mpi */
4288   /*---------------------------------------*/
4289   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4290   if (n==PETSC_DECIDE) {
4291     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4292   } else {
4293     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4294   }
4295   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4296   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4297   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4298 
4299   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4300   B_mpi->assembled     = PETSC_FALSE;
4301   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4302   merge->bi            = bi;
4303   merge->bj            = bj;
4304   merge->buf_ri        = buf_ri;
4305   merge->buf_rj        = buf_rj;
4306   merge->coi           = PETSC_NULL;
4307   merge->coj           = PETSC_NULL;
4308   merge->owners_co     = PETSC_NULL;
4309 
4310   /* attach the supporting struct to B_mpi for reuse */
4311   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4312   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4313   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4314   *mpimat = B_mpi;
4315 
4316   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4317   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4318   PetscFunctionReturn(0);
4319 }
4320 
4321 #undef __FUNCT__
4322 #define __FUNCT__ "MatMerge_SeqsToMPI"
4323 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4324 {
4325   PetscErrorCode   ierr;
4326 
4327   PetscFunctionBegin;
4328   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4329   if (scall == MAT_INITIAL_MATRIX){
4330     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4331   }
4332   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4333   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4334   PetscFunctionReturn(0);
4335 }
4336 
4337 #undef __FUNCT__
4338 #define __FUNCT__ "MatGetLocalMat"
4339 /*@
4340      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4341 
4342     Not Collective
4343 
4344    Input Parameters:
4345 +    A - the matrix
4346 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4347 
4348    Output Parameter:
4349 .    A_loc - the local sequential matrix generated
4350 
4351     Level: developer
4352 
4353 @*/
4354 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4355 {
4356   PetscErrorCode  ierr;
4357   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4358   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4359   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4360   MatScalar       *aa=a->a,*ba=b->a,*cam;
4361   PetscScalar     *ca;
4362   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4363   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4364 
4365   PetscFunctionBegin;
4366   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4367   if (scall == MAT_INITIAL_MATRIX){
4368     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4369     ci[0] = 0;
4370     for (i=0; i<am; i++){
4371       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4372     }
4373     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4374     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4375     k = 0;
4376     for (i=0; i<am; i++) {
4377       ncols_o = bi[i+1] - bi[i];
4378       ncols_d = ai[i+1] - ai[i];
4379       /* off-diagonal portion of A */
4380       for (jo=0; jo<ncols_o; jo++) {
4381         col = cmap[*bj];
4382         if (col >= cstart) break;
4383         cj[k]   = col; bj++;
4384         ca[k++] = *ba++;
4385       }
4386       /* diagonal portion of A */
4387       for (j=0; j<ncols_d; j++) {
4388         cj[k]   = cstart + *aj++;
4389         ca[k++] = *aa++;
4390       }
4391       /* off-diagonal portion of A */
4392       for (j=jo; j<ncols_o; j++) {
4393         cj[k]   = cmap[*bj++];
4394         ca[k++] = *ba++;
4395       }
4396     }
4397     /* put together the new matrix */
4398     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4399     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4400     /* Since these are PETSc arrays, change flags to free them as necessary. */
4401     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4402     mat->free_a  = PETSC_TRUE;
4403     mat->free_ij = PETSC_TRUE;
4404     mat->nonew   = 0;
4405   } else if (scall == MAT_REUSE_MATRIX){
4406     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4407     ci = mat->i; cj = mat->j; cam = mat->a;
4408     for (i=0; i<am; i++) {
4409       /* off-diagonal portion of A */
4410       ncols_o = bi[i+1] - bi[i];
4411       for (jo=0; jo<ncols_o; jo++) {
4412         col = cmap[*bj];
4413         if (col >= cstart) break;
4414         *cam++ = *ba++; bj++;
4415       }
4416       /* diagonal portion of A */
4417       ncols_d = ai[i+1] - ai[i];
4418       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4419       /* off-diagonal portion of A */
4420       for (j=jo; j<ncols_o; j++) {
4421         *cam++ = *ba++; bj++;
4422       }
4423     }
4424   } else {
4425     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4426   }
4427 
4428   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4429   PetscFunctionReturn(0);
4430 }
4431 
4432 #undef __FUNCT__
4433 #define __FUNCT__ "MatGetLocalMatCondensed"
4434 /*@C
4435      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4436 
4437     Not Collective
4438 
4439    Input Parameters:
4440 +    A - the matrix
4441 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4442 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4443 
4444    Output Parameter:
4445 .    A_loc - the local sequential matrix generated
4446 
4447     Level: developer
4448 
4449 @*/
4450 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4451 {
4452   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4453   PetscErrorCode    ierr;
4454   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4455   IS                isrowa,iscola;
4456   Mat               *aloc;
4457 
4458   PetscFunctionBegin;
4459   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4460   if (!row){
4461     start = A->rmap->rstart; end = A->rmap->rend;
4462     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4463   } else {
4464     isrowa = *row;
4465   }
4466   if (!col){
4467     start = A->cmap->rstart;
4468     cmap  = a->garray;
4469     nzA   = a->A->cmap->n;
4470     nzB   = a->B->cmap->n;
4471     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4472     ncols = 0;
4473     for (i=0; i<nzB; i++) {
4474       if (cmap[i] < start) idx[ncols++] = cmap[i];
4475       else break;
4476     }
4477     imark = i;
4478     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4479     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4480     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4481     ierr = PetscFree(idx);CHKERRQ(ierr);
4482   } else {
4483     iscola = *col;
4484   }
4485   if (scall != MAT_INITIAL_MATRIX){
4486     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4487     aloc[0] = *A_loc;
4488   }
4489   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4490   *A_loc = aloc[0];
4491   ierr = PetscFree(aloc);CHKERRQ(ierr);
4492   if (!row){
4493     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4494   }
4495   if (!col){
4496     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4497   }
4498   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4499   PetscFunctionReturn(0);
4500 }
4501 
4502 #undef __FUNCT__
4503 #define __FUNCT__ "MatGetBrowsOfAcols"
4504 /*@C
4505     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4506 
4507     Collective on Mat
4508 
4509    Input Parameters:
4510 +    A,B - the matrices in mpiaij format
4511 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4512 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4513 
4514    Output Parameter:
4515 +    rowb, colb - index sets of rows and columns of B to extract
4516 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4517 -    B_seq - the sequential matrix generated
4518 
4519     Level: developer
4520 
4521 @*/
4522 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4523 {
4524   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4525   PetscErrorCode    ierr;
4526   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4527   IS                isrowb,iscolb;
4528   Mat               *bseq;
4529 
4530   PetscFunctionBegin;
4531   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4532     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4533   }
4534   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4535 
4536   if (scall == MAT_INITIAL_MATRIX){
4537     start = A->cmap->rstart;
4538     cmap  = a->garray;
4539     nzA   = a->A->cmap->n;
4540     nzB   = a->B->cmap->n;
4541     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4542     ncols = 0;
4543     for (i=0; i<nzB; i++) {  /* row < local row index */
4544       if (cmap[i] < start) idx[ncols++] = cmap[i];
4545       else break;
4546     }
4547     imark = i;
4548     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4549     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4550     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4551     ierr = PetscFree(idx);CHKERRQ(ierr);
4552     *brstart = imark;
4553     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4554   } else {
4555     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4556     isrowb = *rowb; iscolb = *colb;
4557     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4558     bseq[0] = *B_seq;
4559   }
4560   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4561   *B_seq = bseq[0];
4562   ierr = PetscFree(bseq);CHKERRQ(ierr);
4563   if (!rowb){
4564     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4565   } else {
4566     *rowb = isrowb;
4567   }
4568   if (!colb){
4569     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4570   } else {
4571     *colb = iscolb;
4572   }
4573   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4574   PetscFunctionReturn(0);
4575 }
4576 
4577 #undef __FUNCT__
4578 #define __FUNCT__ "MatGetBrowsOfAoCols"
4579 /*@C
4580     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4581     of the OFF-DIAGONAL portion of local A
4582 
4583     Collective on Mat
4584 
4585    Input Parameters:
4586 +    A,B - the matrices in mpiaij format
4587 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4588 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4589 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4590 
4591    Output Parameter:
4592 +    B_oth - the sequential matrix generated
4593 
4594     Level: developer
4595 
4596 @*/
4597 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth)
4598 {
4599   VecScatter_MPI_General *gen_to,*gen_from;
4600   PetscErrorCode         ierr;
4601   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4602   Mat_SeqAIJ             *b_oth;
4603   VecScatter             ctx=a->Mvctx;
4604   MPI_Comm               comm=((PetscObject)ctx)->comm;
4605   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4606   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4607   PetscScalar            *rvalues,*svalues;
4608   MatScalar              *b_otha,*bufa,*bufA;
4609   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4610   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4611   MPI_Status             *sstatus,rstatus;
4612   PetscMPIInt            jj;
4613   PetscInt               *cols,sbs,rbs;
4614   PetscScalar            *vals;
4615 
4616   PetscFunctionBegin;
4617   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4618     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4619   }
4620   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4621   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4622 
4623   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4624   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4625   rvalues  = gen_from->values; /* holds the length of receiving row */
4626   svalues  = gen_to->values;   /* holds the length of sending row */
4627   nrecvs   = gen_from->n;
4628   nsends   = gen_to->n;
4629 
4630   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4631   srow     = gen_to->indices;   /* local row index to be sent */
4632   sstarts  = gen_to->starts;
4633   sprocs   = gen_to->procs;
4634   sstatus  = gen_to->sstatus;
4635   sbs      = gen_to->bs;
4636   rstarts  = gen_from->starts;
4637   rprocs   = gen_from->procs;
4638   rbs      = gen_from->bs;
4639 
4640   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4641   if (scall == MAT_INITIAL_MATRIX){
4642     /* i-array */
4643     /*---------*/
4644     /*  post receives */
4645     for (i=0; i<nrecvs; i++){
4646       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4647       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4648       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4649     }
4650 
4651     /* pack the outgoing message */
4652     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4653     rstartsj = sstartsj + nsends +1;
4654     sstartsj[0] = 0;  rstartsj[0] = 0;
4655     len = 0; /* total length of j or a array to be sent */
4656     k = 0;
4657     for (i=0; i<nsends; i++){
4658       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4659       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4660       for (j=0; j<nrows; j++) {
4661         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4662         for (l=0; l<sbs; l++){
4663           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4664           rowlen[j*sbs+l] = ncols;
4665           len += ncols;
4666           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4667         }
4668         k++;
4669       }
4670       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4671       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4672     }
4673     /* recvs and sends of i-array are completed */
4674     i = nrecvs;
4675     while (i--) {
4676       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4677     }
4678     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4679 
4680     /* allocate buffers for sending j and a arrays */
4681     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4682     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4683 
4684     /* create i-array of B_oth */
4685     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4686     b_othi[0] = 0;
4687     len = 0; /* total length of j or a array to be received */
4688     k = 0;
4689     for (i=0; i<nrecvs; i++){
4690       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4691       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4692       for (j=0; j<nrows; j++) {
4693         b_othi[k+1] = b_othi[k] + rowlen[j];
4694         len += rowlen[j]; k++;
4695       }
4696       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4697     }
4698 
4699     /* allocate space for j and a arrrays of B_oth */
4700     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4701     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4702 
4703     /* j-array */
4704     /*---------*/
4705     /*  post receives of j-array */
4706     for (i=0; i<nrecvs; i++){
4707       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4708       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4709     }
4710 
4711     /* pack the outgoing message j-array */
4712     k = 0;
4713     for (i=0; i<nsends; i++){
4714       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4715       bufJ = bufj+sstartsj[i];
4716       for (j=0; j<nrows; j++) {
4717         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4718         for (ll=0; ll<sbs; ll++){
4719           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4720           for (l=0; l<ncols; l++){
4721             *bufJ++ = cols[l];
4722           }
4723           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4724         }
4725       }
4726       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4727     }
4728 
4729     /* recvs and sends of j-array are completed */
4730     i = nrecvs;
4731     while (i--) {
4732       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4733     }
4734     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4735   } else if (scall == MAT_REUSE_MATRIX){
4736     sstartsj = *startsj;
4737     rstartsj = sstartsj + nsends +1;
4738     bufa     = *bufa_ptr;
4739     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4740     b_otha   = b_oth->a;
4741   } else {
4742     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4743   }
4744 
4745   /* a-array */
4746   /*---------*/
4747   /*  post receives of a-array */
4748   for (i=0; i<nrecvs; i++){
4749     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4750     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4751   }
4752 
4753   /* pack the outgoing message a-array */
4754   k = 0;
4755   for (i=0; i<nsends; i++){
4756     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4757     bufA = bufa+sstartsj[i];
4758     for (j=0; j<nrows; j++) {
4759       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4760       for (ll=0; ll<sbs; ll++){
4761         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4762         for (l=0; l<ncols; l++){
4763           *bufA++ = vals[l];
4764         }
4765         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4766       }
4767     }
4768     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4769   }
4770   /* recvs and sends of a-array are completed */
4771   i = nrecvs;
4772   while (i--) {
4773     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4774   }
4775   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4776   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4777 
4778   if (scall == MAT_INITIAL_MATRIX){
4779     /* put together the new matrix */
4780     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4781 
4782     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4783     /* Since these are PETSc arrays, change flags to free them as necessary. */
4784     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4785     b_oth->free_a  = PETSC_TRUE;
4786     b_oth->free_ij = PETSC_TRUE;
4787     b_oth->nonew   = 0;
4788 
4789     ierr = PetscFree(bufj);CHKERRQ(ierr);
4790     if (!startsj || !bufa_ptr){
4791       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4792       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4793     } else {
4794       *startsj  = sstartsj;
4795       *bufa_ptr = bufa;
4796     }
4797   }
4798   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4799   PetscFunctionReturn(0);
4800 }
4801 
4802 #undef __FUNCT__
4803 #define __FUNCT__ "MatGetCommunicationStructs"
4804 /*@C
4805   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4806 
4807   Not Collective
4808 
4809   Input Parameters:
4810 . A - The matrix in mpiaij format
4811 
4812   Output Parameter:
4813 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4814 . colmap - A map from global column index to local index into lvec
4815 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4816 
4817   Level: developer
4818 
4819 @*/
4820 #if defined (PETSC_USE_CTABLE)
4821 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4822 #else
4823 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4824 #endif
4825 {
4826   Mat_MPIAIJ *a;
4827 
4828   PetscFunctionBegin;
4829   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4830   PetscValidPointer(lvec, 2)
4831   PetscValidPointer(colmap, 3)
4832   PetscValidPointer(multScatter, 4)
4833   a = (Mat_MPIAIJ *) A->data;
4834   if (lvec) *lvec = a->lvec;
4835   if (colmap) *colmap = a->colmap;
4836   if (multScatter) *multScatter = a->Mvctx;
4837   PetscFunctionReturn(0);
4838 }
4839 
4840 EXTERN_C_BEGIN
4841 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*);
4842 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*);
4843 EXTERN_C_END
4844 
4845 #include "../src/mat/impls/dense/mpi/mpidense.h"
4846 
4847 #undef __FUNCT__
4848 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4849 /*
4850     Computes (B'*A')' since computing B*A directly is untenable
4851 
4852                n                       p                          p
4853         (              )       (              )         (                  )
4854       m (      A       )  *  n (       B      )   =   m (         C        )
4855         (              )       (              )         (                  )
4856 
4857 */
4858 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4859 {
4860   PetscErrorCode     ierr;
4861   Mat                At,Bt,Ct;
4862 
4863   PetscFunctionBegin;
4864   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4865   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4866   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4867   ierr = MatDestroy(At);CHKERRQ(ierr);
4868   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4869   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4870   ierr = MatDestroy(Ct);CHKERRQ(ierr);
4871   PetscFunctionReturn(0);
4872 }
4873 
4874 #undef __FUNCT__
4875 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4876 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4877 {
4878   PetscErrorCode ierr;
4879   PetscInt       m=A->rmap->n,n=B->cmap->n;
4880   Mat            Cmat;
4881 
4882   PetscFunctionBegin;
4883   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4884   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
4885   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4886   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4887   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
4888   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4889   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4890   *C   = Cmat;
4891   PetscFunctionReturn(0);
4892 }
4893 
4894 /* ----------------------------------------------------------------*/
4895 #undef __FUNCT__
4896 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4897 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4898 {
4899   PetscErrorCode ierr;
4900 
4901   PetscFunctionBegin;
4902   if (scall == MAT_INITIAL_MATRIX){
4903     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4904   }
4905   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4906   PetscFunctionReturn(0);
4907 }
4908 
4909 EXTERN_C_BEGIN
4910 #if defined(PETSC_HAVE_MUMPS)
4911 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*);
4912 #endif
4913 #if defined(PETSC_HAVE_PASTIX)
4914 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
4915 #endif
4916 #if defined(PETSC_HAVE_SUPERLU_DIST)
4917 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
4918 #endif
4919 #if defined(PETSC_HAVE_SPOOLES)
4920 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
4921 #endif
4922 EXTERN_C_END
4923 
4924 /*MC
4925    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4926 
4927    Options Database Keys:
4928 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4929 
4930   Level: beginner
4931 
4932 .seealso: MatCreateMPIAIJ()
4933 M*/
4934 
4935 EXTERN_C_BEGIN
4936 #undef __FUNCT__
4937 #define __FUNCT__ "MatCreate_MPIAIJ"
4938 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
4939 {
4940   Mat_MPIAIJ     *b;
4941   PetscErrorCode ierr;
4942   PetscMPIInt    size;
4943 
4944   PetscFunctionBegin;
4945   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
4946 
4947   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
4948   B->data         = (void*)b;
4949   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4950   B->rmap->bs      = 1;
4951   B->assembled    = PETSC_FALSE;
4952   B->mapping      = 0;
4953 
4954   B->insertmode      = NOT_SET_VALUES;
4955   b->size            = size;
4956   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
4957 
4958   /* build cache for off array entries formed */
4959   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
4960   b->donotstash  = PETSC_FALSE;
4961   b->colmap      = 0;
4962   b->garray      = 0;
4963   b->roworiented = PETSC_TRUE;
4964 
4965   /* stuff used for matrix vector multiply */
4966   b->lvec      = PETSC_NULL;
4967   b->Mvctx     = PETSC_NULL;
4968 
4969   /* stuff for MatGetRow() */
4970   b->rowindices   = 0;
4971   b->rowvalues    = 0;
4972   b->getrowactive = PETSC_FALSE;
4973 
4974 #if defined(PETSC_HAVE_SPOOLES)
4975   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_spooles_C",
4976                                      "MatGetFactor_mpiaij_spooles",
4977                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
4978 #endif
4979 #if defined(PETSC_HAVE_MUMPS)
4980   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_mumps_C",
4981                                      "MatGetFactor_mpiaij_mumps",
4982                                      MatGetFactor_mpiaij_mumps);CHKERRQ(ierr);
4983 #endif
4984 #if defined(PETSC_HAVE_PASTIX)
4985   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_pastix_C",
4986 					   "MatGetFactor_mpiaij_pastix",
4987 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
4988 #endif
4989 #if defined(PETSC_HAVE_SUPERLU_DIST)
4990   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_superlu_dist_C",
4991                                      "MatGetFactor_mpiaij_superlu_dist",
4992                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
4993 #endif
4994   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
4995                                      "MatStoreValues_MPIAIJ",
4996                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
4997   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
4998                                      "MatRetrieveValues_MPIAIJ",
4999                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5000   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5001 				     "MatGetDiagonalBlock_MPIAIJ",
5002                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5003   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5004 				     "MatIsTranspose_MPIAIJ",
5005 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5006   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5007 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5008 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5009   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5010 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5011 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5012   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5013 				     "MatDiagonalScaleLocal_MPIAIJ",
5014 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5015   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
5016                                      "MatConvert_MPIAIJ_MPICSRPERM",
5017                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
5018   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
5019                                      "MatConvert_MPIAIJ_MPICRL",
5020                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
5021   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5022                                      "MatMatMult_MPIDense_MPIAIJ",
5023                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5024   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5025                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5026                                       MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5027   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5028                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5029                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5030   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5031   PetscFunctionReturn(0);
5032 }
5033 EXTERN_C_END
5034 
5035 #undef __FUNCT__
5036 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5037 /*@
5038      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5039          and "off-diagonal" part of the matrix in CSR format.
5040 
5041    Collective on MPI_Comm
5042 
5043    Input Parameters:
5044 +  comm - MPI communicator
5045 .  m - number of local rows (Cannot be PETSC_DECIDE)
5046 .  n - This value should be the same as the local size used in creating the
5047        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5048        calculated if N is given) For square matrices n is almost always m.
5049 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5050 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5051 .   i - row indices for "diagonal" portion of matrix
5052 .   j - column indices
5053 .   a - matrix values
5054 .   oi - row indices for "off-diagonal" portion of matrix
5055 .   oj - column indices
5056 -   oa - matrix values
5057 
5058    Output Parameter:
5059 .   mat - the matrix
5060 
5061    Level: advanced
5062 
5063    Notes:
5064        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5065 
5066        The i and j indices are 0 based
5067 
5068        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5069 
5070 
5071 .keywords: matrix, aij, compressed row, sparse, parallel
5072 
5073 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5074           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5075 @*/
5076 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5077 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5078 {
5079   PetscErrorCode ierr;
5080   Mat_MPIAIJ     *maij;
5081 
5082  PetscFunctionBegin;
5083   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5084   if (i[0]) {
5085     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5086   }
5087   if (oi[0]) {
5088     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5089   }
5090   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5091   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5092   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5093   maij = (Mat_MPIAIJ*) (*mat)->data;
5094   maij->donotstash     = PETSC_TRUE;
5095   (*mat)->preallocated = PETSC_TRUE;
5096 
5097   ierr = PetscMapSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5098   ierr = PetscMapSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5099   ierr = PetscMapSetUp((*mat)->rmap);CHKERRQ(ierr);
5100   ierr = PetscMapSetUp((*mat)->cmap);CHKERRQ(ierr);
5101 
5102   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5103   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5104 
5105   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5106   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5107   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5108   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5109 
5110   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5111   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5112   PetscFunctionReturn(0);
5113 }
5114 
5115 /*
5116     Special version for direct calls from Fortran
5117 */
5118 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5119 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5120 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5121 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5122 #endif
5123 
5124 /* Change these macros so can be used in void function */
5125 #undef CHKERRQ
5126 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5127 #undef SETERRQ2
5128 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5129 #undef SETERRQ
5130 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5131 
5132 EXTERN_C_BEGIN
5133 #undef __FUNCT__
5134 #define __FUNCT__ "matsetvaluesmpiaij_"
5135 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5136 {
5137   Mat             mat = *mmat;
5138   PetscInt        m = *mm, n = *mn;
5139   InsertMode      addv = *maddv;
5140   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5141   PetscScalar     value;
5142   PetscErrorCode  ierr;
5143 
5144   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5145   if (mat->insertmode == NOT_SET_VALUES) {
5146     mat->insertmode = addv;
5147   }
5148 #if defined(PETSC_USE_DEBUG)
5149   else if (mat->insertmode != addv) {
5150     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5151   }
5152 #endif
5153   {
5154   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5155   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5156   PetscTruth      roworiented = aij->roworiented;
5157 
5158   /* Some Variables required in the macro */
5159   Mat             A = aij->A;
5160   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5161   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5162   MatScalar       *aa = a->a;
5163   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5164   Mat             B = aij->B;
5165   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5166   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5167   MatScalar       *ba = b->a;
5168 
5169   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5170   PetscInt        nonew = a->nonew;
5171   MatScalar       *ap1,*ap2;
5172 
5173   PetscFunctionBegin;
5174   for (i=0; i<m; i++) {
5175     if (im[i] < 0) continue;
5176 #if defined(PETSC_USE_DEBUG)
5177     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5178 #endif
5179     if (im[i] >= rstart && im[i] < rend) {
5180       row      = im[i] - rstart;
5181       lastcol1 = -1;
5182       rp1      = aj + ai[row];
5183       ap1      = aa + ai[row];
5184       rmax1    = aimax[row];
5185       nrow1    = ailen[row];
5186       low1     = 0;
5187       high1    = nrow1;
5188       lastcol2 = -1;
5189       rp2      = bj + bi[row];
5190       ap2      = ba + bi[row];
5191       rmax2    = bimax[row];
5192       nrow2    = bilen[row];
5193       low2     = 0;
5194       high2    = nrow2;
5195 
5196       for (j=0; j<n; j++) {
5197         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5198         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5199         if (in[j] >= cstart && in[j] < cend){
5200           col = in[j] - cstart;
5201           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5202         } else if (in[j] < 0) continue;
5203 #if defined(PETSC_USE_DEBUG)
5204         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5205 #endif
5206         else {
5207           if (mat->was_assembled) {
5208             if (!aij->colmap) {
5209               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5210             }
5211 #if defined (PETSC_USE_CTABLE)
5212             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5213 	    col--;
5214 #else
5215             col = aij->colmap[in[j]] - 1;
5216 #endif
5217             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5218               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5219               col =  in[j];
5220               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5221               B = aij->B;
5222               b = (Mat_SeqAIJ*)B->data;
5223               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5224               rp2      = bj + bi[row];
5225               ap2      = ba + bi[row];
5226               rmax2    = bimax[row];
5227               nrow2    = bilen[row];
5228               low2     = 0;
5229               high2    = nrow2;
5230               bm       = aij->B->rmap->n;
5231               ba = b->a;
5232             }
5233           } else col = in[j];
5234           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5235         }
5236       }
5237     } else {
5238       if (!aij->donotstash) {
5239         if (roworiented) {
5240           if (ignorezeroentries && v[i*n] == 0.0) continue;
5241           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
5242         } else {
5243           if (ignorezeroentries && v[i] == 0.0) continue;
5244           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
5245         }
5246       }
5247     }
5248   }}
5249   PetscFunctionReturnVoid();
5250 }
5251 EXTERN_C_END
5252 
5253