xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 3fa76a5b188a02dd677974deca162194da798f66)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscTruth     aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap->n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap->n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap->n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap->n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 
207 #define CHUNKSIZE   15
208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
209 { \
210     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
211     lastcol1 = col;\
212     while (high1-low1 > 5) { \
213       t = (low1+high1)/2; \
214       if (rp1[t] > col) high1 = t; \
215       else             low1  = t; \
216     } \
217       for (_i=low1; _i<high1; _i++) { \
218         if (rp1[_i] > col) break; \
219         if (rp1[_i] == col) { \
220           if (addv == ADD_VALUES) ap1[_i] += value;   \
221           else                    ap1[_i] = value; \
222           goto a_noinsert; \
223         } \
224       }  \
225       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
226       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
227       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
228       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
229       N = nrow1++ - 1; a->nz++; high1++; \
230       /* shift up all the later entries in this row */ \
231       for (ii=N; ii>=_i; ii--) { \
232         rp1[ii+1] = rp1[ii]; \
233         ap1[ii+1] = ap1[ii]; \
234       } \
235       rp1[_i] = col;  \
236       ap1[_i] = value;  \
237       a_noinsert: ; \
238       ailen[row] = nrow1; \
239 }
240 
241 
242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
243 { \
244     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
245     lastcol2 = col;\
246     while (high2-low2 > 5) { \
247       t = (low2+high2)/2; \
248       if (rp2[t] > col) high2 = t; \
249       else             low2  = t; \
250     } \
251     for (_i=low2; _i<high2; _i++) {		\
252       if (rp2[_i] > col) break;			\
253       if (rp2[_i] == col) {			      \
254 	if (addv == ADD_VALUES) ap2[_i] += value;     \
255 	else                    ap2[_i] = value;      \
256 	goto b_noinsert;			      \
257       }						      \
258     }							      \
259     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
260     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
261     if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
262     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
263     N = nrow2++ - 1; b->nz++; high2++;					\
264     /* shift up all the later entries in this row */			\
265     for (ii=N; ii>=_i; ii--) {						\
266       rp2[ii+1] = rp2[ii];						\
267       ap2[ii+1] = ap2[ii];						\
268     }									\
269     rp2[_i] = col;							\
270     ap2[_i] = value;							\
271     b_noinsert: ;								\
272     bilen[row] = nrow2;							\
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
278 {
279   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
280   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
281   PetscErrorCode ierr;
282   PetscInt       l,*garray = mat->garray,diag;
283 
284   PetscFunctionBegin;
285   /* code only works for square matrices A */
286 
287   /* find size of row to the left of the diagonal part */
288   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
289   row  = row - diag;
290   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
291     if (garray[b->j[b->i[row]+l]] > diag) break;
292   }
293   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
294 
295   /* diagonal part */
296   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
297 
298   /* right of diagonal part */
299   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
300   PetscFunctionReturn(0);
301 }
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "MatSetValues_MPIAIJ"
305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
306 {
307   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
308   PetscScalar    value;
309   PetscErrorCode ierr;
310   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
311   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
312   PetscTruth     roworiented = aij->roworiented;
313 
314   /* Some Variables required in the macro */
315   Mat            A = aij->A;
316   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
317   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
318   MatScalar      *aa = a->a;
319   PetscTruth     ignorezeroentries = a->ignorezeroentries;
320   Mat            B = aij->B;
321   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
322   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
323   MatScalar      *ba = b->a;
324 
325   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
326   PetscInt       nonew = a->nonew;
327   MatScalar      *ap1,*ap2;
328 
329   PetscFunctionBegin;
330   for (i=0; i<m; i++) {
331     if (im[i] < 0) continue;
332 #if defined(PETSC_USE_DEBUG)
333     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
334 #endif
335     if (im[i] >= rstart && im[i] < rend) {
336       row      = im[i] - rstart;
337       lastcol1 = -1;
338       rp1      = aj + ai[row];
339       ap1      = aa + ai[row];
340       rmax1    = aimax[row];
341       nrow1    = ailen[row];
342       low1     = 0;
343       high1    = nrow1;
344       lastcol2 = -1;
345       rp2      = bj + bi[row];
346       ap2      = ba + bi[row];
347       rmax2    = bimax[row];
348       nrow2    = bilen[row];
349       low2     = 0;
350       high2    = nrow2;
351 
352       for (j=0; j<n; j++) {
353         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
354         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
355         if (in[j] >= cstart && in[j] < cend){
356           col = in[j] - cstart;
357           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
358         } else if (in[j] < 0) continue;
359 #if defined(PETSC_USE_DEBUG)
360         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
361 #endif
362         else {
363           if (mat->was_assembled) {
364             if (!aij->colmap) {
365               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
366             }
367 #if defined (PETSC_USE_CTABLE)
368             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
369 	    col--;
370 #else
371             col = aij->colmap[in[j]] - 1;
372 #endif
373             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
374               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
375               col =  in[j];
376               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
377               B = aij->B;
378               b = (Mat_SeqAIJ*)B->data;
379               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
380               rp2      = bj + bi[row];
381               ap2      = ba + bi[row];
382               rmax2    = bimax[row];
383               nrow2    = bilen[row];
384               low2     = 0;
385               high2    = nrow2;
386               bm       = aij->B->rmap->n;
387               ba = b->a;
388             }
389           } else col = in[j];
390           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
391         }
392       }
393     } else {
394       if (!aij->donotstash) {
395         if (roworiented) {
396           if (ignorezeroentries && v[i*n] == 0.0) continue;
397           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
398         } else {
399           if (ignorezeroentries && v[i] == 0.0) continue;
400           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
401         }
402       }
403     }
404   }
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNCT__
409 #define __FUNCT__ "MatGetValues_MPIAIJ"
410 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
411 {
412   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
413   PetscErrorCode ierr;
414   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
415   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
416 
417   PetscFunctionBegin;
418   for (i=0; i<m; i++) {
419     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
420     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
421     if (idxm[i] >= rstart && idxm[i] < rend) {
422       row = idxm[i] - rstart;
423       for (j=0; j<n; j++) {
424         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
425         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
426         if (idxn[j] >= cstart && idxn[j] < cend){
427           col = idxn[j] - cstart;
428           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
429         } else {
430           if (!aij->colmap) {
431             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
432           }
433 #if defined (PETSC_USE_CTABLE)
434           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
435           col --;
436 #else
437           col = aij->colmap[idxn[j]] - 1;
438 #endif
439           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
440           else {
441             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
442           }
443         }
444       }
445     } else {
446       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
447     }
448   }
449   PetscFunctionReturn(0);
450 }
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscErrorCode ierr;
458   PetscInt       nstash,reallocs;
459   InsertMode     addv;
460 
461   PetscFunctionBegin;
462   if (aij->donotstash) {
463     PetscFunctionReturn(0);
464   }
465 
466   /* make sure all processors are either in INSERTMODE or ADDMODE */
467   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
468   if (addv == (ADD_VALUES|INSERT_VALUES)) {
469     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
470   }
471   mat->insertmode = addv; /* in case this processor had no cache */
472 
473   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
474   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
475   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
482 {
483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
484   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
485   PetscErrorCode ierr;
486   PetscMPIInt    n;
487   PetscInt       i,j,rstart,ncols,flg;
488   PetscInt       *row,*col;
489   PetscTruth     other_disassembled;
490   PetscScalar    *val;
491   InsertMode     addv = mat->insertmode;
492 
493   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
494   PetscFunctionBegin;
495   if (!aij->donotstash) {
496     while (1) {
497       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
498       if (!flg) break;
499 
500       for (i=0; i<n;) {
501         /* Now identify the consecutive vals belonging to the same row */
502         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
503         if (j < n) ncols = j-i;
504         else       ncols = n-i;
505         /* Now assemble all these values with a single function call */
506         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
507         i = j;
508       }
509     }
510     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
511   }
512   a->compressedrow.use     = PETSC_FALSE;
513   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
514   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
515 
516   /* determine if any processor has disassembled, if so we must
517      also disassemble ourselfs, in order that we may reassemble. */
518   /*
519      if nonzero structure of submatrix B cannot change then we know that
520      no processor disassembled thus we can skip this stuff
521   */
522   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
523     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
524     if (mat->was_assembled && !other_disassembled) {
525       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
526     }
527   }
528   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
529     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
530   }
531   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
532   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
533   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
534   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
535 
536   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
537   aij->rowvalues = 0;
538 
539   /* used by MatAXPY() */
540   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
541   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
542 
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
549 {
550   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
551   PetscErrorCode ierr;
552 
553   PetscFunctionBegin;
554   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
555   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNCT__
560 #define __FUNCT__ "MatZeroRows_MPIAIJ"
561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
562 {
563   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
564   PetscErrorCode ierr;
565   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
566   PetscInt       i,*owners = A->rmap->range;
567   PetscInt       *nprocs,j,idx,nsends,row;
568   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
569   PetscInt       *rvalues,count,base,slen,*source;
570   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
571   MPI_Comm       comm = ((PetscObject)A)->comm;
572   MPI_Request    *send_waits,*recv_waits;
573   MPI_Status     recv_status,*send_status;
574 #if defined(PETSC_DEBUG)
575   PetscTruth     found = PETSC_FALSE;
576 #endif
577 
578   PetscFunctionBegin;
579   /*  first count number of contributors to each processor */
580   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
581   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
582   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
583   j = 0;
584   for (i=0; i<N; i++) {
585     if (lastidx > (idx = rows[i])) j = 0;
586     lastidx = idx;
587     for (; j<size; j++) {
588       if (idx >= owners[j] && idx < owners[j+1]) {
589         nprocs[2*j]++;
590         nprocs[2*j+1] = 1;
591         owner[i] = j;
592 #if defined(PETSC_DEBUG)
593         found = PETSC_TRUE;
594 #endif
595         break;
596       }
597     }
598 #if defined(PETSC_DEBUG)
599     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
600     found = PETSC_FALSE;
601 #endif
602   }
603   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
604 
605   /* inform other processors of number of messages and max length*/
606   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
607 
608   /* post receives:   */
609   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
610   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
611   for (i=0; i<nrecvs; i++) {
612     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
613   }
614 
615   /* do sends:
616       1) starts[i] gives the starting index in svalues for stuff going to
617          the ith processor
618   */
619   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
620   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
621   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
622   starts[0] = 0;
623   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
624   for (i=0; i<N; i++) {
625     svalues[starts[owner[i]]++] = rows[i];
626   }
627 
628   starts[0] = 0;
629   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
630   count = 0;
631   for (i=0; i<size; i++) {
632     if (nprocs[2*i+1]) {
633       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
634     }
635   }
636   ierr = PetscFree(starts);CHKERRQ(ierr);
637 
638   base = owners[rank];
639 
640   /*  wait on receives */
641   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
642   source = lens + nrecvs;
643   count  = nrecvs; slen = 0;
644   while (count) {
645     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
646     /* unpack receives into our local space */
647     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
648     source[imdex]  = recv_status.MPI_SOURCE;
649     lens[imdex]    = n;
650     slen          += n;
651     count--;
652   }
653   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
654 
655   /* move the data into the send scatter */
656   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
657   count = 0;
658   for (i=0; i<nrecvs; i++) {
659     values = rvalues + i*nmax;
660     for (j=0; j<lens[i]; j++) {
661       lrows[count++] = values[j] - base;
662     }
663   }
664   ierr = PetscFree(rvalues);CHKERRQ(ierr);
665   ierr = PetscFree(lens);CHKERRQ(ierr);
666   ierr = PetscFree(owner);CHKERRQ(ierr);
667   ierr = PetscFree(nprocs);CHKERRQ(ierr);
668 
669   /* actually zap the local rows */
670   /*
671         Zero the required rows. If the "diagonal block" of the matrix
672      is square and the user wishes to set the diagonal we use separate
673      code so that MatSetValues() is not called for each diagonal allocating
674      new memory, thus calling lots of mallocs and slowing things down.
675 
676        Contributed by: Matthew Knepley
677   */
678   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
679   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
680   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
681     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
682   } else if (diag != 0.0) {
683     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
684     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
685       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
687     }
688     for (i = 0; i < slen; i++) {
689       row  = lrows[i] + rstart;
690       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
691     }
692     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
693     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
694   } else {
695     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
696   }
697   ierr = PetscFree(lrows);CHKERRQ(ierr);
698 
699   /* wait on sends */
700   if (nsends) {
701     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
702     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
703     ierr = PetscFree(send_status);CHKERRQ(ierr);
704   }
705   ierr = PetscFree(send_waits);CHKERRQ(ierr);
706   ierr = PetscFree(svalues);CHKERRQ(ierr);
707 
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNCT__
712 #define __FUNCT__ "MatMult_MPIAIJ"
713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
714 {
715   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
716   PetscErrorCode ierr;
717   PetscInt       nt;
718 
719   PetscFunctionBegin;
720   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
721   if (nt != A->cmap->n) {
722     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
723   }
724   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
725   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
726   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
727   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
728   PetscFunctionReturn(0);
729 }
730 
731 #undef __FUNCT__
732 #define __FUNCT__ "MatMultAdd_MPIAIJ"
733 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
734 {
735   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
736   PetscErrorCode ierr;
737 
738   PetscFunctionBegin;
739   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
740   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
741   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
742   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
743   PetscFunctionReturn(0);
744 }
745 
746 #undef __FUNCT__
747 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
748 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
749 {
750   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
751   PetscErrorCode ierr;
752   PetscTruth     merged;
753 
754   PetscFunctionBegin;
755   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
756   /* do nondiagonal part */
757   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
758   if (!merged) {
759     /* send it on its way */
760     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
761     /* do local part */
762     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
763     /* receive remote parts: note this assumes the values are not actually */
764     /* added in yy until the next line, */
765     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
766   } else {
767     /* do local part */
768     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
769     /* send it on its way */
770     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
771     /* values actually were received in the Begin() but we need to call this nop */
772     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
773   }
774   PetscFunctionReturn(0);
775 }
776 
777 EXTERN_C_BEGIN
778 #undef __FUNCT__
779 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
780 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
781 {
782   MPI_Comm       comm;
783   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
784   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
785   IS             Me,Notme;
786   PetscErrorCode ierr;
787   PetscInt       M,N,first,last,*notme,i;
788   PetscMPIInt    size;
789 
790   PetscFunctionBegin;
791 
792   /* Easy test: symmetric diagonal block */
793   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
794   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
795   if (!*f) PetscFunctionReturn(0);
796   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
797   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
798   if (size == 1) PetscFunctionReturn(0);
799 
800   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
801   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
802   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
803   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
804   for (i=0; i<first; i++) notme[i] = i;
805   for (i=last; i<M; i++) notme[i-last+first] = i;
806   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
807   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
808   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
809   Aoff = Aoffs[0];
810   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
811   Boff = Boffs[0];
812   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
813   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
814   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
815   ierr = ISDestroy(Me);CHKERRQ(ierr);
816   ierr = ISDestroy(Notme);CHKERRQ(ierr);
817 
818   PetscFunctionReturn(0);
819 }
820 EXTERN_C_END
821 
822 #undef __FUNCT__
823 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
824 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
825 {
826   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
827   PetscErrorCode ierr;
828 
829   PetscFunctionBegin;
830   /* do nondiagonal part */
831   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
832   /* send it on its way */
833   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
834   /* do local part */
835   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
836   /* receive remote parts */
837   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
838   PetscFunctionReturn(0);
839 }
840 
841 /*
842   This only works correctly for square matrices where the subblock A->A is the
843    diagonal block
844 */
845 #undef __FUNCT__
846 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
847 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
848 {
849   PetscErrorCode ierr;
850   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
851 
852   PetscFunctionBegin;
853   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
854   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) {
855     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
856   }
857   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
858   PetscFunctionReturn(0);
859 }
860 
861 #undef __FUNCT__
862 #define __FUNCT__ "MatScale_MPIAIJ"
863 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
864 {
865   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
866   PetscErrorCode ierr;
867 
868   PetscFunctionBegin;
869   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
870   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
871   PetscFunctionReturn(0);
872 }
873 
874 #undef __FUNCT__
875 #define __FUNCT__ "MatDestroy_MPIAIJ"
876 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
877 {
878   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
879   PetscErrorCode ierr;
880 
881   PetscFunctionBegin;
882 #if defined(PETSC_USE_LOG)
883   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
884 #endif
885   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
886   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
887   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
888 #if defined (PETSC_USE_CTABLE)
889   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
890 #else
891   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
892 #endif
893   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
894   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
895   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
896   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
897   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
898   ierr = PetscFree(aij);CHKERRQ(ierr);
899 
900   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
901   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
902   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
903   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
904   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
905   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
906   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
907   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
908   PetscFunctionReturn(0);
909 }
910 
911 #undef __FUNCT__
912 #define __FUNCT__ "MatView_MPIAIJ_Binary"
913 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
914 {
915   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
916   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
917   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
918   PetscErrorCode    ierr;
919   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
920   int               fd;
921   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
922   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
923   PetscScalar       *column_values;
924 
925   PetscFunctionBegin;
926   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
927   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
928   nz   = A->nz + B->nz;
929   if (!rank) {
930     header[0] = MAT_FILE_COOKIE;
931     header[1] = mat->rmap->N;
932     header[2] = mat->cmap->N;
933     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
934     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
935     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
936     /* get largest number of rows any processor has */
937     rlen = mat->rmap->n;
938     range = mat->rmap->range;
939     for (i=1; i<size; i++) {
940       rlen = PetscMax(rlen,range[i+1] - range[i]);
941     }
942   } else {
943     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
944     rlen = mat->rmap->n;
945   }
946 
947   /* load up the local row counts */
948   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
949   for (i=0; i<mat->rmap->n; i++) {
950     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
951   }
952 
953   /* store the row lengths to the file */
954   if (!rank) {
955     MPI_Status status;
956     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
957     for (i=1; i<size; i++) {
958       rlen = range[i+1] - range[i];
959       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
960       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
961     }
962   } else {
963     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
964   }
965   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
966 
967   /* load up the local column indices */
968   nzmax = nz; /* )th processor needs space a largest processor needs */
969   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
970   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
971   cnt  = 0;
972   for (i=0; i<mat->rmap->n; i++) {
973     for (j=B->i[i]; j<B->i[i+1]; j++) {
974       if ( (col = garray[B->j[j]]) > cstart) break;
975       column_indices[cnt++] = col;
976     }
977     for (k=A->i[i]; k<A->i[i+1]; k++) {
978       column_indices[cnt++] = A->j[k] + cstart;
979     }
980     for (; j<B->i[i+1]; j++) {
981       column_indices[cnt++] = garray[B->j[j]];
982     }
983   }
984   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
985 
986   /* store the column indices to the file */
987   if (!rank) {
988     MPI_Status status;
989     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
990     for (i=1; i<size; i++) {
991       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
992       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
993       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
994       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
995     }
996   } else {
997     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
998     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
999   }
1000   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1001 
1002   /* load up the local column values */
1003   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1004   cnt  = 0;
1005   for (i=0; i<mat->rmap->n; i++) {
1006     for (j=B->i[i]; j<B->i[i+1]; j++) {
1007       if ( garray[B->j[j]] > cstart) break;
1008       column_values[cnt++] = B->a[j];
1009     }
1010     for (k=A->i[i]; k<A->i[i+1]; k++) {
1011       column_values[cnt++] = A->a[k];
1012     }
1013     for (; j<B->i[i+1]; j++) {
1014       column_values[cnt++] = B->a[j];
1015     }
1016   }
1017   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1018 
1019   /* store the column values to the file */
1020   if (!rank) {
1021     MPI_Status status;
1022     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1023     for (i=1; i<size; i++) {
1024       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1025       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1026       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1027       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1028     }
1029   } else {
1030     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1031     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1032   }
1033   ierr = PetscFree(column_values);CHKERRQ(ierr);
1034   PetscFunctionReturn(0);
1035 }
1036 
1037 #undef __FUNCT__
1038 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1039 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1040 {
1041   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1042   PetscErrorCode    ierr;
1043   PetscMPIInt       rank = aij->rank,size = aij->size;
1044   PetscTruth        isdraw,iascii,isbinary;
1045   PetscViewer       sviewer;
1046   PetscViewerFormat format;
1047 
1048   PetscFunctionBegin;
1049   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1050   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1051   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1052   if (iascii) {
1053     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1054     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1055       MatInfo    info;
1056       PetscTruth inodes;
1057 
1058       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1059       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1060       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1061       if (!inodes) {
1062         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1063 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1064       } else {
1065         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1066 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1067       }
1068       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1069       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1070       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1071       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1072       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1073       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1074       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1075       PetscFunctionReturn(0);
1076     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1077       PetscInt   inodecount,inodelimit,*inodes;
1078       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1079       if (inodes) {
1080         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1081       } else {
1082         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1083       }
1084       PetscFunctionReturn(0);
1085     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1086       PetscFunctionReturn(0);
1087     }
1088   } else if (isbinary) {
1089     if (size == 1) {
1090       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1091       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1092     } else {
1093       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1094     }
1095     PetscFunctionReturn(0);
1096   } else if (isdraw) {
1097     PetscDraw  draw;
1098     PetscTruth isnull;
1099     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1100     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1101   }
1102 
1103   if (size == 1) {
1104     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1105     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1106   } else {
1107     /* assemble the entire matrix onto first processor. */
1108     Mat         A;
1109     Mat_SeqAIJ  *Aloc;
1110     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1111     MatScalar   *a;
1112 
1113     if (mat->rmap->N > 1024) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 512 rows, use binary format instead");
1114 
1115     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1116     if (!rank) {
1117       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1118     } else {
1119       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1120     }
1121     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1122     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1123     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1124     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1125 
1126     /* copy over the A part */
1127     Aloc = (Mat_SeqAIJ*)aij->A->data;
1128     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1129     row = mat->rmap->rstart;
1130     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1131     for (i=0; i<m; i++) {
1132       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1133       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1134     }
1135     aj = Aloc->j;
1136     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1137 
1138     /* copy over the B part */
1139     Aloc = (Mat_SeqAIJ*)aij->B->data;
1140     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1141     row  = mat->rmap->rstart;
1142     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1143     ct   = cols;
1144     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1145     for (i=0; i<m; i++) {
1146       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1147       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1148     }
1149     ierr = PetscFree(ct);CHKERRQ(ierr);
1150     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1151     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1152     /*
1153        Everyone has to call to draw the matrix since the graphics waits are
1154        synchronized across all processors that share the PetscDraw object
1155     */
1156     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1157     if (!rank) {
1158       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1159       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1160     }
1161     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1162     ierr = MatDestroy(A);CHKERRQ(ierr);
1163   }
1164   PetscFunctionReturn(0);
1165 }
1166 
1167 #undef __FUNCT__
1168 #define __FUNCT__ "MatView_MPIAIJ"
1169 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1170 {
1171   PetscErrorCode ierr;
1172   PetscTruth     iascii,isdraw,issocket,isbinary;
1173 
1174   PetscFunctionBegin;
1175   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1176   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1177   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1178   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1179   if (iascii || isdraw || isbinary || issocket) {
1180     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1181   } else {
1182     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1183   }
1184   PetscFunctionReturn(0);
1185 }
1186 
1187 #undef __FUNCT__
1188 #define __FUNCT__ "MatRelax_MPIAIJ"
1189 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1190 {
1191   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1192   PetscErrorCode ierr;
1193   Vec            bb1;
1194 
1195   PetscFunctionBegin;
1196   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1197 
1198   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1199     if (flag & SOR_ZERO_INITIAL_GUESS) {
1200       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1201       its--;
1202     }
1203 
1204     while (its--) {
1205       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1206       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1207 
1208       /* update rhs: bb1 = bb - B*x */
1209       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1210       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1211 
1212       /* local sweep */
1213       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
1214     }
1215   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1216     if (flag & SOR_ZERO_INITIAL_GUESS) {
1217       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1218       its--;
1219     }
1220     while (its--) {
1221       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1222       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1223 
1224       /* update rhs: bb1 = bb - B*x */
1225       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1226       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1227 
1228       /* local sweep */
1229       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1230     }
1231   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1232     if (flag & SOR_ZERO_INITIAL_GUESS) {
1233       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1234       its--;
1235     }
1236     while (its--) {
1237       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1238       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1239 
1240       /* update rhs: bb1 = bb - B*x */
1241       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1242       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1243 
1244       /* local sweep */
1245       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1246     }
1247   } else {
1248     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1249   }
1250 
1251   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 #undef __FUNCT__
1256 #define __FUNCT__ "MatPermute_MPIAIJ"
1257 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1258 {
1259   MPI_Comm       comm,pcomm;
1260   PetscInt       first,local_size,nrows;
1261   const PetscInt *rows;
1262   int            ntids;
1263   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1264   PetscErrorCode ierr;
1265 
1266   PetscFunctionBegin;
1267   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1268   /* make a collective version of 'rowp' */
1269   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1270   if (pcomm==comm) {
1271     crowp = rowp;
1272   } else {
1273     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1274     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1275     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1276     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1277   }
1278   /* collect the global row permutation and invert it */
1279   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1280   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1281   if (pcomm!=comm) {
1282     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1283   }
1284   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1285   /* get the local target indices */
1286   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1287   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1288   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1289   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1290   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1291   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1292   /* the column permutation is so much easier;
1293      make a local version of 'colp' and invert it */
1294   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1295   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1296   if (ntids==1) {
1297     lcolp = colp;
1298   } else {
1299     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1300     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1301     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1302   }
1303   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1304   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1305   if (ntids>1) {
1306     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1307     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1308   }
1309   /* now we just get the submatrix */
1310   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1311   /* clean up */
1312   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1313   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1314   PetscFunctionReturn(0);
1315 }
1316 
1317 #undef __FUNCT__
1318 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1319 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1320 {
1321   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1322   Mat            A = mat->A,B = mat->B;
1323   PetscErrorCode ierr;
1324   PetscReal      isend[5],irecv[5];
1325 
1326   PetscFunctionBegin;
1327   info->block_size     = 1.0;
1328   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1329   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1330   isend[3] = info->memory;  isend[4] = info->mallocs;
1331   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1332   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1333   isend[3] += info->memory;  isend[4] += info->mallocs;
1334   if (flag == MAT_LOCAL) {
1335     info->nz_used      = isend[0];
1336     info->nz_allocated = isend[1];
1337     info->nz_unneeded  = isend[2];
1338     info->memory       = isend[3];
1339     info->mallocs      = isend[4];
1340   } else if (flag == MAT_GLOBAL_MAX) {
1341     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1342     info->nz_used      = irecv[0];
1343     info->nz_allocated = irecv[1];
1344     info->nz_unneeded  = irecv[2];
1345     info->memory       = irecv[3];
1346     info->mallocs      = irecv[4];
1347   } else if (flag == MAT_GLOBAL_SUM) {
1348     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1349     info->nz_used      = irecv[0];
1350     info->nz_allocated = irecv[1];
1351     info->nz_unneeded  = irecv[2];
1352     info->memory       = irecv[3];
1353     info->mallocs      = irecv[4];
1354   }
1355   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1356   info->fill_ratio_needed = 0;
1357   info->factor_mallocs    = 0;
1358 
1359   PetscFunctionReturn(0);
1360 }
1361 
1362 #undef __FUNCT__
1363 #define __FUNCT__ "MatSetOption_MPIAIJ"
1364 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1365 {
1366   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1367   PetscErrorCode ierr;
1368 
1369   PetscFunctionBegin;
1370   switch (op) {
1371   case MAT_NEW_NONZERO_LOCATIONS:
1372   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1373   case MAT_KEEP_ZEROED_ROWS:
1374   case MAT_NEW_NONZERO_LOCATION_ERR:
1375   case MAT_USE_INODES:
1376   case MAT_IGNORE_ZERO_ENTRIES:
1377     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1378     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1379     break;
1380   case MAT_ROW_ORIENTED:
1381     a->roworiented = flg;
1382     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1383     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1384     break;
1385   case MAT_NEW_DIAGONALS:
1386     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1387     break;
1388   case MAT_IGNORE_OFF_PROC_ENTRIES:
1389     a->donotstash = PETSC_TRUE;
1390     break;
1391   case MAT_SYMMETRIC:
1392     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1393     break;
1394   case MAT_STRUCTURALLY_SYMMETRIC:
1395   case MAT_HERMITIAN:
1396   case MAT_SYMMETRY_ETERNAL:
1397     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1398     break;
1399   default:
1400     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1401   }
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 #undef __FUNCT__
1406 #define __FUNCT__ "MatGetRow_MPIAIJ"
1407 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1408 {
1409   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1410   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1411   PetscErrorCode ierr;
1412   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1413   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1414   PetscInt       *cmap,*idx_p;
1415 
1416   PetscFunctionBegin;
1417   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1418   mat->getrowactive = PETSC_TRUE;
1419 
1420   if (!mat->rowvalues && (idx || v)) {
1421     /*
1422         allocate enough space to hold information from the longest row.
1423     */
1424     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1425     PetscInt     max = 1,tmp;
1426     for (i=0; i<matin->rmap->n; i++) {
1427       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1428       if (max < tmp) { max = tmp; }
1429     }
1430     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1431     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1432   }
1433 
1434   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1435   lrow = row - rstart;
1436 
1437   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1438   if (!v)   {pvA = 0; pvB = 0;}
1439   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1440   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1441   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1442   nztot = nzA + nzB;
1443 
1444   cmap  = mat->garray;
1445   if (v  || idx) {
1446     if (nztot) {
1447       /* Sort by increasing column numbers, assuming A and B already sorted */
1448       PetscInt imark = -1;
1449       if (v) {
1450         *v = v_p = mat->rowvalues;
1451         for (i=0; i<nzB; i++) {
1452           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1453           else break;
1454         }
1455         imark = i;
1456         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1457         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1458       }
1459       if (idx) {
1460         *idx = idx_p = mat->rowindices;
1461         if (imark > -1) {
1462           for (i=0; i<imark; i++) {
1463             idx_p[i] = cmap[cworkB[i]];
1464           }
1465         } else {
1466           for (i=0; i<nzB; i++) {
1467             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1468             else break;
1469           }
1470           imark = i;
1471         }
1472         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1473         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1474       }
1475     } else {
1476       if (idx) *idx = 0;
1477       if (v)   *v   = 0;
1478     }
1479   }
1480   *nz = nztot;
1481   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1482   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1483   PetscFunctionReturn(0);
1484 }
1485 
1486 #undef __FUNCT__
1487 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1488 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1489 {
1490   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1491 
1492   PetscFunctionBegin;
1493   if (!aij->getrowactive) {
1494     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1495   }
1496   aij->getrowactive = PETSC_FALSE;
1497   PetscFunctionReturn(0);
1498 }
1499 
1500 #undef __FUNCT__
1501 #define __FUNCT__ "MatNorm_MPIAIJ"
1502 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1503 {
1504   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1505   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1506   PetscErrorCode ierr;
1507   PetscInt       i,j,cstart = mat->cmap->rstart;
1508   PetscReal      sum = 0.0;
1509   MatScalar      *v;
1510 
1511   PetscFunctionBegin;
1512   if (aij->size == 1) {
1513     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1514   } else {
1515     if (type == NORM_FROBENIUS) {
1516       v = amat->a;
1517       for (i=0; i<amat->nz; i++) {
1518 #if defined(PETSC_USE_COMPLEX)
1519         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1520 #else
1521         sum += (*v)*(*v); v++;
1522 #endif
1523       }
1524       v = bmat->a;
1525       for (i=0; i<bmat->nz; i++) {
1526 #if defined(PETSC_USE_COMPLEX)
1527         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1528 #else
1529         sum += (*v)*(*v); v++;
1530 #endif
1531       }
1532       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1533       *norm = sqrt(*norm);
1534     } else if (type == NORM_1) { /* max column norm */
1535       PetscReal *tmp,*tmp2;
1536       PetscInt  *jj,*garray = aij->garray;
1537       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1538       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1539       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1540       *norm = 0.0;
1541       v = amat->a; jj = amat->j;
1542       for (j=0; j<amat->nz; j++) {
1543         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1544       }
1545       v = bmat->a; jj = bmat->j;
1546       for (j=0; j<bmat->nz; j++) {
1547         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1548       }
1549       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1550       for (j=0; j<mat->cmap->N; j++) {
1551         if (tmp2[j] > *norm) *norm = tmp2[j];
1552       }
1553       ierr = PetscFree(tmp);CHKERRQ(ierr);
1554       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1555     } else if (type == NORM_INFINITY) { /* max row norm */
1556       PetscReal ntemp = 0.0;
1557       for (j=0; j<aij->A->rmap->n; j++) {
1558         v = amat->a + amat->i[j];
1559         sum = 0.0;
1560         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1561           sum += PetscAbsScalar(*v); v++;
1562         }
1563         v = bmat->a + bmat->i[j];
1564         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1565           sum += PetscAbsScalar(*v); v++;
1566         }
1567         if (sum > ntemp) ntemp = sum;
1568       }
1569       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1570     } else {
1571       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1572     }
1573   }
1574   PetscFunctionReturn(0);
1575 }
1576 
1577 #undef __FUNCT__
1578 #define __FUNCT__ "MatTranspose_MPIAIJ"
1579 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1580 {
1581   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1582   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1583   PetscErrorCode ierr;
1584   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1585   PetscInt       cstart=A->cmap->rstart,ncol;
1586   Mat            B;
1587   MatScalar      *array;
1588 
1589   PetscFunctionBegin;
1590   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1591 
1592   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1593   ai = Aloc->i; aj = Aloc->j;
1594   bi = Bloc->i; bj = Bloc->j;
1595   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1596     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1597     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1598     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1599     for (i=0; i<ai[ma]; i++){
1600       d_nnz[aj[i]] ++;
1601       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1602     }
1603 
1604     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1605     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1606     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1607     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1608     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1609   } else {
1610     B = *matout;
1611   }
1612 
1613   /* copy over the A part */
1614   array = Aloc->a;
1615   row = A->rmap->rstart;
1616   for (i=0; i<ma; i++) {
1617     ncol = ai[i+1]-ai[i];
1618     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1619     row++; array += ncol; aj += ncol;
1620   }
1621   aj = Aloc->j;
1622   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1623 
1624   /* copy over the B part */
1625   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1626   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1627   array = Bloc->a;
1628   row = A->rmap->rstart;
1629   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1630   cols_tmp = cols;
1631   for (i=0; i<mb; i++) {
1632     ncol = bi[i+1]-bi[i];
1633     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1634     row++; array += ncol; cols_tmp += ncol;
1635   }
1636   ierr = PetscFree(cols);CHKERRQ(ierr);
1637 
1638   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1639   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1640   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1641     *matout = B;
1642   } else {
1643     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1644   }
1645   PetscFunctionReturn(0);
1646 }
1647 
1648 #undef __FUNCT__
1649 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1650 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1651 {
1652   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1653   Mat            a = aij->A,b = aij->B;
1654   PetscErrorCode ierr;
1655   PetscInt       s1,s2,s3;
1656 
1657   PetscFunctionBegin;
1658   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1659   if (rr) {
1660     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1661     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1662     /* Overlap communication with computation. */
1663     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1664   }
1665   if (ll) {
1666     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1667     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1668     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1669   }
1670   /* scale  the diagonal block */
1671   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1672 
1673   if (rr) {
1674     /* Do a scatter end and then right scale the off-diagonal block */
1675     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1676     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1677   }
1678 
1679   PetscFunctionReturn(0);
1680 }
1681 
1682 #undef __FUNCT__
1683 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1684 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1685 {
1686   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1687   PetscErrorCode ierr;
1688 
1689   PetscFunctionBegin;
1690   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1691   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1692   PetscFunctionReturn(0);
1693 }
1694 #undef __FUNCT__
1695 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1696 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1697 {
1698   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1699   PetscErrorCode ierr;
1700 
1701   PetscFunctionBegin;
1702   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1703   PetscFunctionReturn(0);
1704 }
1705 
1706 #undef __FUNCT__
1707 #define __FUNCT__ "MatEqual_MPIAIJ"
1708 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1709 {
1710   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1711   Mat            a,b,c,d;
1712   PetscTruth     flg;
1713   PetscErrorCode ierr;
1714 
1715   PetscFunctionBegin;
1716   a = matA->A; b = matA->B;
1717   c = matB->A; d = matB->B;
1718 
1719   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1720   if (flg) {
1721     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1722   }
1723   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1724   PetscFunctionReturn(0);
1725 }
1726 
1727 #undef __FUNCT__
1728 #define __FUNCT__ "MatCopy_MPIAIJ"
1729 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1730 {
1731   PetscErrorCode ierr;
1732   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1733   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1734 
1735   PetscFunctionBegin;
1736   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1737   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1738     /* because of the column compression in the off-processor part of the matrix a->B,
1739        the number of columns in a->B and b->B may be different, hence we cannot call
1740        the MatCopy() directly on the two parts. If need be, we can provide a more
1741        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1742        then copying the submatrices */
1743     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1744   } else {
1745     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1746     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1747   }
1748   PetscFunctionReturn(0);
1749 }
1750 
1751 #undef __FUNCT__
1752 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1753 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1754 {
1755   PetscErrorCode ierr;
1756 
1757   PetscFunctionBegin;
1758   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1759   PetscFunctionReturn(0);
1760 }
1761 
1762 #include "petscblaslapack.h"
1763 #undef __FUNCT__
1764 #define __FUNCT__ "MatAXPY_MPIAIJ"
1765 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1766 {
1767   PetscErrorCode ierr;
1768   PetscInt       i;
1769   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1770   PetscBLASInt   bnz,one=1;
1771   Mat_SeqAIJ     *x,*y;
1772 
1773   PetscFunctionBegin;
1774   if (str == SAME_NONZERO_PATTERN) {
1775     PetscScalar alpha = a;
1776     x = (Mat_SeqAIJ *)xx->A->data;
1777     y = (Mat_SeqAIJ *)yy->A->data;
1778     bnz = PetscBLASIntCast(x->nz);
1779     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1780     x = (Mat_SeqAIJ *)xx->B->data;
1781     y = (Mat_SeqAIJ *)yy->B->data;
1782     bnz = PetscBLASIntCast(x->nz);
1783     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1784   } else if (str == SUBSET_NONZERO_PATTERN) {
1785     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1786 
1787     x = (Mat_SeqAIJ *)xx->B->data;
1788     y = (Mat_SeqAIJ *)yy->B->data;
1789     if (y->xtoy && y->XtoY != xx->B) {
1790       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1791       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1792     }
1793     if (!y->xtoy) { /* get xtoy */
1794       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1795       y->XtoY = xx->B;
1796       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1797     }
1798     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1799   } else {
1800     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1801   }
1802   PetscFunctionReturn(0);
1803 }
1804 
1805 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1806 
1807 #undef __FUNCT__
1808 #define __FUNCT__ "MatConjugate_MPIAIJ"
1809 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1810 {
1811 #if defined(PETSC_USE_COMPLEX)
1812   PetscErrorCode ierr;
1813   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1814 
1815   PetscFunctionBegin;
1816   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1817   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1818 #else
1819   PetscFunctionBegin;
1820 #endif
1821   PetscFunctionReturn(0);
1822 }
1823 
1824 #undef __FUNCT__
1825 #define __FUNCT__ "MatRealPart_MPIAIJ"
1826 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1827 {
1828   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1829   PetscErrorCode ierr;
1830 
1831   PetscFunctionBegin;
1832   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1833   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1834   PetscFunctionReturn(0);
1835 }
1836 
1837 #undef __FUNCT__
1838 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1839 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1840 {
1841   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1842   PetscErrorCode ierr;
1843 
1844   PetscFunctionBegin;
1845   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1846   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1847   PetscFunctionReturn(0);
1848 }
1849 
1850 #ifdef PETSC_HAVE_PBGL
1851 
1852 #include <boost/parallel/mpi/bsp_process_group.hpp>
1853 #include <boost/graph/distributed/ilu_default_graph.hpp>
1854 #include <boost/graph/distributed/ilu_0_block.hpp>
1855 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1856 #include <boost/graph/distributed/petsc/interface.hpp>
1857 #include <boost/multi_array.hpp>
1858 #include <boost/parallel/distributed_property_map->hpp>
1859 
1860 #undef __FUNCT__
1861 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1862 /*
1863   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1864 */
1865 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1866 {
1867   namespace petsc = boost::distributed::petsc;
1868 
1869   namespace graph_dist = boost::graph::distributed;
1870   using boost::graph::distributed::ilu_default::process_group_type;
1871   using boost::graph::ilu_permuted;
1872 
1873   PetscTruth      row_identity, col_identity;
1874   PetscContainer  c;
1875   PetscInt        m, n, M, N;
1876   PetscErrorCode  ierr;
1877 
1878   PetscFunctionBegin;
1879   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1880   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1881   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1882   if (!row_identity || !col_identity) {
1883     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1884   }
1885 
1886   process_group_type pg;
1887   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1888   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1889   lgraph_type&   level_graph = *lgraph_p;
1890   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1891 
1892   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1893   ilu_permuted(level_graph);
1894 
1895   /* put together the new matrix */
1896   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1897   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1898   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1899   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1900   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1901   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1902   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1903 
1904   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1905   ierr = PetscContainerSetPointer(c, lgraph_p);
1906   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1907   PetscFunctionReturn(0);
1908 }
1909 
1910 #undef __FUNCT__
1911 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1912 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1913 {
1914   PetscFunctionBegin;
1915   PetscFunctionReturn(0);
1916 }
1917 
1918 #undef __FUNCT__
1919 #define __FUNCT__ "MatSolve_MPIAIJ"
1920 /*
1921   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1922 */
1923 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1924 {
1925   namespace graph_dist = boost::graph::distributed;
1926 
1927   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1928   lgraph_type*   lgraph_p;
1929   PetscContainer c;
1930   PetscErrorCode ierr;
1931 
1932   PetscFunctionBegin;
1933   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1934   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1935   ierr = VecCopy(b, x); CHKERRQ(ierr);
1936 
1937   PetscScalar* array_x;
1938   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1939   PetscInt sx;
1940   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1941 
1942   PetscScalar* array_b;
1943   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1944   PetscInt sb;
1945   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1946 
1947   lgraph_type&   level_graph = *lgraph_p;
1948   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1949 
1950   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
1951   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
1952                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
1953 
1954   typedef boost::iterator_property_map<array_ref_type::iterator,
1955                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
1956   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
1957                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
1958 
1959   ilu_set_solve(*lgraph_p, vector_b, vector_x);
1960 
1961   PetscFunctionReturn(0);
1962 }
1963 #endif
1964 
1965 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
1966   PetscInt       nzlocal,nsends,nrecvs;
1967   PetscMPIInt    *send_rank;
1968   PetscInt       *sbuf_nz,*sbuf_j,**rbuf_j;
1969   PetscScalar    *sbuf_a,**rbuf_a;
1970   PetscErrorCode (*MatDestroy)(Mat);
1971 } Mat_Redundant;
1972 
1973 #undef __FUNCT__
1974 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
1975 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
1976 {
1977   PetscErrorCode       ierr;
1978   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
1979   PetscInt             i;
1980 
1981   PetscFunctionBegin;
1982   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
1983   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1984   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1985   for (i=0; i<redund->nrecvs; i++){
1986     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1987     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1988   }
1989   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1990   ierr = PetscFree(redund);CHKERRQ(ierr);
1991   PetscFunctionReturn(0);
1992 }
1993 
1994 #undef __FUNCT__
1995 #define __FUNCT__ "MatDestroy_MatRedundant"
1996 PetscErrorCode MatDestroy_MatRedundant(Mat A)
1997 {
1998   PetscErrorCode  ierr;
1999   PetscContainer  container;
2000   Mat_Redundant   *redund=PETSC_NULL;
2001 
2002   PetscFunctionBegin;
2003   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2004   if (container) {
2005     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2006   } else {
2007     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2008   }
2009   A->ops->destroy = redund->MatDestroy;
2010   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2011   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2012   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2013   PetscFunctionReturn(0);
2014 }
2015 
2016 #undef __FUNCT__
2017 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2018 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2019 {
2020   PetscMPIInt    rank,size;
2021   MPI_Comm       comm=((PetscObject)mat)->comm;
2022   PetscErrorCode ierr;
2023   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2024   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2025   PetscInt       *rowrange=mat->rmap->range;
2026   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2027   Mat            A=aij->A,B=aij->B,C=*matredundant;
2028   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2029   PetscScalar    *sbuf_a;
2030   PetscInt       nzlocal=a->nz+b->nz;
2031   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2032   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2033   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2034   MatScalar      *aworkA,*aworkB;
2035   PetscScalar    *vals;
2036   PetscMPIInt    tag1,tag2,tag3,imdex;
2037   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2038                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2039   MPI_Status     recv_status,*send_status;
2040   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2041   PetscInt       **rbuf_j=PETSC_NULL;
2042   PetscScalar    **rbuf_a=PETSC_NULL;
2043   Mat_Redundant  *redund=PETSC_NULL;
2044   PetscContainer container;
2045 
2046   PetscFunctionBegin;
2047   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2048   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2049 
2050   if (reuse == MAT_REUSE_MATRIX) {
2051     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2052     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2053     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2054     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2055     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2056     if (container) {
2057       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2058     } else {
2059       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2060     }
2061     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2062 
2063     nsends    = redund->nsends;
2064     nrecvs    = redund->nrecvs;
2065     send_rank = redund->send_rank; recv_rank = send_rank + size;
2066     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
2067     sbuf_j    = redund->sbuf_j;
2068     sbuf_a    = redund->sbuf_a;
2069     rbuf_j    = redund->rbuf_j;
2070     rbuf_a    = redund->rbuf_a;
2071   }
2072 
2073   if (reuse == MAT_INITIAL_MATRIX){
2074     PetscMPIInt  subrank,subsize;
2075     PetscInt     nleftover,np_subcomm;
2076     /* get the destination processors' id send_rank, nsends and nrecvs */
2077     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2078     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2079     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
2080     recv_rank = send_rank + size;
2081     np_subcomm = size/nsubcomm;
2082     nleftover  = size - nsubcomm*np_subcomm;
2083     nsends = 0; nrecvs = 0;
2084     for (i=0; i<size; i++){ /* i=rank*/
2085       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2086         send_rank[nsends] = i; nsends++;
2087         recv_rank[nrecvs++] = i;
2088       }
2089     }
2090     if (rank >= size - nleftover){/* this proc is a leftover processor */
2091       i = size-nleftover-1;
2092       j = 0;
2093       while (j < nsubcomm - nleftover){
2094         send_rank[nsends++] = i;
2095         i--; j++;
2096       }
2097     }
2098 
2099     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2100       for (i=0; i<nleftover; i++){
2101         recv_rank[nrecvs++] = size-nleftover+i;
2102       }
2103     }
2104 
2105     /* allocate sbuf_j, sbuf_a */
2106     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2107     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2108     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2109   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2110 
2111   /* copy mat's local entries into the buffers */
2112   if (reuse == MAT_INITIAL_MATRIX){
2113     rownz_max = 0;
2114     rptr = sbuf_j;
2115     cols = sbuf_j + rend-rstart + 1;
2116     vals = sbuf_a;
2117     rptr[0] = 0;
2118     for (i=0; i<rend-rstart; i++){
2119       row = i + rstart;
2120       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2121       ncols  = nzA + nzB;
2122       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2123       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2124       /* load the column indices for this row into cols */
2125       lwrite = 0;
2126       for (l=0; l<nzB; l++) {
2127         if ((ctmp = bmap[cworkB[l]]) < cstart){
2128           vals[lwrite]   = aworkB[l];
2129           cols[lwrite++] = ctmp;
2130         }
2131       }
2132       for (l=0; l<nzA; l++){
2133         vals[lwrite]   = aworkA[l];
2134         cols[lwrite++] = cstart + cworkA[l];
2135       }
2136       for (l=0; l<nzB; l++) {
2137         if ((ctmp = bmap[cworkB[l]]) >= cend){
2138           vals[lwrite]   = aworkB[l];
2139           cols[lwrite++] = ctmp;
2140         }
2141       }
2142       vals += ncols;
2143       cols += ncols;
2144       rptr[i+1] = rptr[i] + ncols;
2145       if (rownz_max < ncols) rownz_max = ncols;
2146     }
2147     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2148   } else { /* only copy matrix values into sbuf_a */
2149     rptr = sbuf_j;
2150     vals = sbuf_a;
2151     rptr[0] = 0;
2152     for (i=0; i<rend-rstart; i++){
2153       row = i + rstart;
2154       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2155       ncols  = nzA + nzB;
2156       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2157       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2158       lwrite = 0;
2159       for (l=0; l<nzB; l++) {
2160         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2161       }
2162       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2163       for (l=0; l<nzB; l++) {
2164         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2165       }
2166       vals += ncols;
2167       rptr[i+1] = rptr[i] + ncols;
2168     }
2169   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2170 
2171   /* send nzlocal to others, and recv other's nzlocal */
2172   /*--------------------------------------------------*/
2173   if (reuse == MAT_INITIAL_MATRIX){
2174     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2175     s_waits2 = s_waits3 + nsends;
2176     s_waits1 = s_waits2 + nsends;
2177     r_waits1 = s_waits1 + nsends;
2178     r_waits2 = r_waits1 + nrecvs;
2179     r_waits3 = r_waits2 + nrecvs;
2180   } else {
2181     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2182     r_waits3 = s_waits3 + nsends;
2183   }
2184 
2185   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2186   if (reuse == MAT_INITIAL_MATRIX){
2187     /* get new tags to keep the communication clean */
2188     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2189     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2190     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2191     rbuf_nz = sbuf_nz + nsends;
2192 
2193     /* post receives of other's nzlocal */
2194     for (i=0; i<nrecvs; i++){
2195       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2196     }
2197     /* send nzlocal to others */
2198     for (i=0; i<nsends; i++){
2199       sbuf_nz[i] = nzlocal;
2200       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2201     }
2202     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2203     count = nrecvs;
2204     while (count) {
2205       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2206       recv_rank[imdex] = recv_status.MPI_SOURCE;
2207       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2208       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2209 
2210       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2211       rbuf_nz[imdex] += i + 2;
2212       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2213       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2214       count--;
2215     }
2216     /* wait on sends of nzlocal */
2217     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2218     /* send mat->i,j to others, and recv from other's */
2219     /*------------------------------------------------*/
2220     for (i=0; i<nsends; i++){
2221       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2222       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2223     }
2224     /* wait on receives of mat->i,j */
2225     /*------------------------------*/
2226     count = nrecvs;
2227     while (count) {
2228       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2229       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2230       count--;
2231     }
2232     /* wait on sends of mat->i,j */
2233     /*---------------------------*/
2234     if (nsends) {
2235       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2236     }
2237   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2238 
2239   /* post receives, send and receive mat->a */
2240   /*----------------------------------------*/
2241   for (imdex=0; imdex<nrecvs; imdex++) {
2242     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2243   }
2244   for (i=0; i<nsends; i++){
2245     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2246   }
2247   count = nrecvs;
2248   while (count) {
2249     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2250     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2251     count--;
2252   }
2253   if (nsends) {
2254     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2255   }
2256 
2257   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2258 
2259   /* create redundant matrix */
2260   /*-------------------------*/
2261   if (reuse == MAT_INITIAL_MATRIX){
2262     /* compute rownz_max for preallocation */
2263     for (imdex=0; imdex<nrecvs; imdex++){
2264       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2265       rptr = rbuf_j[imdex];
2266       for (i=0; i<j; i++){
2267         ncols = rptr[i+1] - rptr[i];
2268         if (rownz_max < ncols) rownz_max = ncols;
2269       }
2270     }
2271 
2272     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2273     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2274     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2275     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2276     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2277   } else {
2278     C = *matredundant;
2279   }
2280 
2281   /* insert local matrix entries */
2282   rptr = sbuf_j;
2283   cols = sbuf_j + rend-rstart + 1;
2284   vals = sbuf_a;
2285   for (i=0; i<rend-rstart; i++){
2286     row   = i + rstart;
2287     ncols = rptr[i+1] - rptr[i];
2288     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2289     vals += ncols;
2290     cols += ncols;
2291   }
2292   /* insert received matrix entries */
2293   for (imdex=0; imdex<nrecvs; imdex++){
2294     rstart = rowrange[recv_rank[imdex]];
2295     rend   = rowrange[recv_rank[imdex]+1];
2296     rptr = rbuf_j[imdex];
2297     cols = rbuf_j[imdex] + rend-rstart + 1;
2298     vals = rbuf_a[imdex];
2299     for (i=0; i<rend-rstart; i++){
2300       row   = i + rstart;
2301       ncols = rptr[i+1] - rptr[i];
2302       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2303       vals += ncols;
2304       cols += ncols;
2305     }
2306   }
2307   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2308   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2309   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2310   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2311   if (reuse == MAT_INITIAL_MATRIX){
2312     PetscContainer container;
2313     *matredundant = C;
2314     /* create a supporting struct and attach it to C for reuse */
2315     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2316     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2317     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2318     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2319     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2320 
2321     redund->nzlocal = nzlocal;
2322     redund->nsends  = nsends;
2323     redund->nrecvs  = nrecvs;
2324     redund->send_rank = send_rank;
2325     redund->sbuf_nz = sbuf_nz;
2326     redund->sbuf_j  = sbuf_j;
2327     redund->sbuf_a  = sbuf_a;
2328     redund->rbuf_j  = rbuf_j;
2329     redund->rbuf_a  = rbuf_a;
2330 
2331     redund->MatDestroy = C->ops->destroy;
2332     C->ops->destroy    = MatDestroy_MatRedundant;
2333   }
2334   PetscFunctionReturn(0);
2335 }
2336 
2337 #undef __FUNCT__
2338 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2339 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2340 {
2341   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2342   PetscErrorCode ierr;
2343   PetscInt       i,*idxb = 0;
2344   PetscScalar    *va,*vb;
2345   Vec            vtmp;
2346 
2347   PetscFunctionBegin;
2348   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2349   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2350   if (idx) {
2351     for (i=0; i<A->cmap->n; i++) {
2352       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2353     }
2354   }
2355 
2356   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2357   if (idx) {
2358     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2359   }
2360   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2361   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2362 
2363   for (i=0; i<A->rmap->n; i++){
2364     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2365       va[i] = vb[i];
2366       if (idx) idx[i] = a->garray[idxb[i]];
2367     }
2368   }
2369 
2370   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2371   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2372   if (idxb) {
2373     ierr = PetscFree(idxb);CHKERRQ(ierr);
2374   }
2375   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2376   PetscFunctionReturn(0);
2377 }
2378 
2379 #undef __FUNCT__
2380 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2381 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2382 {
2383   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2384   PetscErrorCode ierr;
2385   PetscInt       i,*idxb = 0;
2386   PetscScalar    *va,*vb;
2387   Vec            vtmp;
2388 
2389   PetscFunctionBegin;
2390   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2391   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2392   if (idx) {
2393     for (i=0; i<A->cmap->n; i++) {
2394       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2395     }
2396   }
2397 
2398   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2399   if (idx) {
2400     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2401   }
2402   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2403   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2404 
2405   for (i=0; i<A->rmap->n; i++){
2406     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2407       va[i] = vb[i];
2408       if (idx) idx[i] = a->garray[idxb[i]];
2409     }
2410   }
2411 
2412   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2413   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2414   if (idxb) {
2415     ierr = PetscFree(idxb);CHKERRQ(ierr);
2416   }
2417   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2418   PetscFunctionReturn(0);
2419 }
2420 
2421 #undef __FUNCT__
2422 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2423 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2424 {
2425   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2426   PetscInt       n      = A->rmap->n;
2427   PetscInt       cstart = A->cmap->rstart;
2428   PetscInt      *cmap   = mat->garray;
2429   PetscInt      *diagIdx, *offdiagIdx;
2430   Vec            diagV, offdiagV;
2431   PetscScalar   *a, *diagA, *offdiagA;
2432   PetscInt       r;
2433   PetscErrorCode ierr;
2434 
2435   PetscFunctionBegin;
2436   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2437   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2438   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2439   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2440   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2441   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2442   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2443   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2444   for(r = 0; r < n; ++r) {
2445     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2446       a[r]   = diagA[r];
2447       idx[r] = cstart + diagIdx[r];
2448     } else {
2449       a[r]   = offdiagA[r];
2450       idx[r] = cmap[offdiagIdx[r]];
2451     }
2452   }
2453   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2454   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2455   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2456   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2457   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2458   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2459   PetscFunctionReturn(0);
2460 }
2461 
2462 #undef __FUNCT__
2463 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2464 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2465 {
2466   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2467   PetscInt       n      = A->rmap->n;
2468   PetscInt       cstart = A->cmap->rstart;
2469   PetscInt      *cmap   = mat->garray;
2470   PetscInt      *diagIdx, *offdiagIdx;
2471   Vec            diagV, offdiagV;
2472   PetscScalar   *a, *diagA, *offdiagA;
2473   PetscInt       r;
2474   PetscErrorCode ierr;
2475 
2476   PetscFunctionBegin;
2477   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2478   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2479   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2480   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2481   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2482   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2483   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2484   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2485   for(r = 0; r < n; ++r) {
2486     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2487       a[r]   = diagA[r];
2488       idx[r] = cstart + diagIdx[r];
2489     } else {
2490       a[r]   = offdiagA[r];
2491       idx[r] = cmap[offdiagIdx[r]];
2492     }
2493   }
2494   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2495   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2496   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2497   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2498   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2499   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2500   PetscFunctionReturn(0);
2501 }
2502 
2503 #undef __FUNCT__
2504 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2505 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[])
2506 {
2507   PetscErrorCode ierr;
2508 
2509   PetscFunctionBegin;
2510   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
2511   PetscFunctionReturn(0);
2512 }
2513 
2514 /* -------------------------------------------------------------------*/
2515 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2516        MatGetRow_MPIAIJ,
2517        MatRestoreRow_MPIAIJ,
2518        MatMult_MPIAIJ,
2519 /* 4*/ MatMultAdd_MPIAIJ,
2520        MatMultTranspose_MPIAIJ,
2521        MatMultTransposeAdd_MPIAIJ,
2522 #ifdef PETSC_HAVE_PBGL
2523        MatSolve_MPIAIJ,
2524 #else
2525        0,
2526 #endif
2527        0,
2528        0,
2529 /*10*/ 0,
2530        0,
2531        0,
2532        MatRelax_MPIAIJ,
2533        MatTranspose_MPIAIJ,
2534 /*15*/ MatGetInfo_MPIAIJ,
2535        MatEqual_MPIAIJ,
2536        MatGetDiagonal_MPIAIJ,
2537        MatDiagonalScale_MPIAIJ,
2538        MatNorm_MPIAIJ,
2539 /*20*/ MatAssemblyBegin_MPIAIJ,
2540        MatAssemblyEnd_MPIAIJ,
2541        0,
2542        MatSetOption_MPIAIJ,
2543        MatZeroEntries_MPIAIJ,
2544 /*25*/ MatZeroRows_MPIAIJ,
2545        0,
2546 #ifdef PETSC_HAVE_PBGL
2547        0,
2548 #else
2549        0,
2550 #endif
2551        0,
2552        0,
2553 /*30*/ MatSetUpPreallocation_MPIAIJ,
2554 #ifdef PETSC_HAVE_PBGL
2555        0,
2556 #else
2557        0,
2558 #endif
2559        0,
2560        0,
2561        0,
2562 /*35*/ MatDuplicate_MPIAIJ,
2563        0,
2564        0,
2565        0,
2566        0,
2567 /*40*/ MatAXPY_MPIAIJ,
2568        MatGetSubMatrices_MPIAIJ,
2569        MatIncreaseOverlap_MPIAIJ,
2570        MatGetValues_MPIAIJ,
2571        MatCopy_MPIAIJ,
2572 /*45*/ MatGetRowMax_MPIAIJ,
2573        MatScale_MPIAIJ,
2574        0,
2575        0,
2576        0,
2577 /*50*/ MatSetBlockSize_MPIAIJ,
2578        0,
2579        0,
2580        0,
2581        0,
2582 /*55*/ MatFDColoringCreate_MPIAIJ,
2583        0,
2584        MatSetUnfactored_MPIAIJ,
2585        MatPermute_MPIAIJ,
2586        0,
2587 /*60*/ MatGetSubMatrix_MPIAIJ,
2588        MatDestroy_MPIAIJ,
2589        MatView_MPIAIJ,
2590        0,
2591        0,
2592 /*65*/ 0,
2593        0,
2594        0,
2595        0,
2596        0,
2597 /*70*/ MatGetRowMaxAbs_MPIAIJ,
2598        MatGetRowMinAbs_MPIAIJ,
2599        0,
2600        MatSetColoring_MPIAIJ,
2601 #if defined(PETSC_HAVE_ADIC)
2602        MatSetValuesAdic_MPIAIJ,
2603 #else
2604        0,
2605 #endif
2606        MatSetValuesAdifor_MPIAIJ,
2607 /*75*/ 0,
2608        0,
2609        0,
2610        0,
2611        0,
2612 /*80*/ 0,
2613        0,
2614        0,
2615 /*84*/ MatLoad_MPIAIJ,
2616        0,
2617        0,
2618        0,
2619        0,
2620        0,
2621 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
2622        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2623        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2624        MatPtAP_Basic,
2625        MatPtAPSymbolic_MPIAIJ,
2626 /*95*/ MatPtAPNumeric_MPIAIJ,
2627        0,
2628        0,
2629        0,
2630        0,
2631 /*100*/0,
2632        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2633        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2634        MatConjugate_MPIAIJ,
2635        0,
2636 /*105*/MatSetValuesRow_MPIAIJ,
2637        MatRealPart_MPIAIJ,
2638        MatImaginaryPart_MPIAIJ,
2639        0,
2640        0,
2641 /*110*/0,
2642        MatGetRedundantMatrix_MPIAIJ,
2643        MatGetRowMin_MPIAIJ,
2644        0,
2645        0,
2646 /*115*/MatGetSeqNonzerostructure_MPIAIJ};
2647 
2648 /* ----------------------------------------------------------------------------------------*/
2649 
2650 EXTERN_C_BEGIN
2651 #undef __FUNCT__
2652 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2653 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2654 {
2655   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2656   PetscErrorCode ierr;
2657 
2658   PetscFunctionBegin;
2659   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2660   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2661   PetscFunctionReturn(0);
2662 }
2663 EXTERN_C_END
2664 
2665 EXTERN_C_BEGIN
2666 #undef __FUNCT__
2667 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2668 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2669 {
2670   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2671   PetscErrorCode ierr;
2672 
2673   PetscFunctionBegin;
2674   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2675   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2676   PetscFunctionReturn(0);
2677 }
2678 EXTERN_C_END
2679 
2680 #include "petscpc.h"
2681 EXTERN_C_BEGIN
2682 #undef __FUNCT__
2683 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2684 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2685 {
2686   Mat_MPIAIJ     *b;
2687   PetscErrorCode ierr;
2688   PetscInt       i;
2689 
2690   PetscFunctionBegin;
2691   B->preallocated = PETSC_TRUE;
2692   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2693   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2694   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2695   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2696 
2697   B->rmap->bs = B->cmap->bs = 1;
2698   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2699   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2700   if (d_nnz) {
2701     for (i=0; i<B->rmap->n; i++) {
2702       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2703     }
2704   }
2705   if (o_nnz) {
2706     for (i=0; i<B->rmap->n; i++) {
2707       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2708     }
2709   }
2710   b = (Mat_MPIAIJ*)B->data;
2711 
2712   /* Explicitly create 2 MATSEQAIJ matrices. */
2713   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2714   ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2715   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2716   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2717   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2718   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2719   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2720   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2721 
2722   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2723   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2724 
2725   PetscFunctionReturn(0);
2726 }
2727 EXTERN_C_END
2728 
2729 #undef __FUNCT__
2730 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2731 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2732 {
2733   Mat            mat;
2734   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2735   PetscErrorCode ierr;
2736 
2737   PetscFunctionBegin;
2738   *newmat       = 0;
2739   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2740   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2741   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2742   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2743   a    = (Mat_MPIAIJ*)mat->data;
2744 
2745   mat->factor       = matin->factor;
2746   mat->rmap->bs      = matin->rmap->bs;
2747   mat->assembled    = PETSC_TRUE;
2748   mat->insertmode   = NOT_SET_VALUES;
2749   mat->preallocated = PETSC_TRUE;
2750 
2751   a->size           = oldmat->size;
2752   a->rank           = oldmat->rank;
2753   a->donotstash     = oldmat->donotstash;
2754   a->roworiented    = oldmat->roworiented;
2755   a->rowindices     = 0;
2756   a->rowvalues      = 0;
2757   a->getrowactive   = PETSC_FALSE;
2758 
2759   ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr);
2760   ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr);
2761 
2762   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2763   if (oldmat->colmap) {
2764 #if defined (PETSC_USE_CTABLE)
2765     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2766 #else
2767     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2768     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2769     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2770 #endif
2771   } else a->colmap = 0;
2772   if (oldmat->garray) {
2773     PetscInt len;
2774     len  = oldmat->B->cmap->n;
2775     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2776     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2777     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2778   } else a->garray = 0;
2779 
2780   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2781   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2782   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2783   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2784   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2785   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2786   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2787   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2788   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2789   *newmat = mat;
2790   PetscFunctionReturn(0);
2791 }
2792 
2793 #include "petscsys.h"
2794 
2795 #undef __FUNCT__
2796 #define __FUNCT__ "MatLoad_MPIAIJ"
2797 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2798 {
2799   Mat            A;
2800   PetscScalar    *vals,*svals;
2801   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2802   MPI_Status     status;
2803   PetscErrorCode ierr;
2804   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
2805   PetscInt       i,nz,j,rstart,rend,mmax;
2806   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2807   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2808   PetscInt       cend,cstart,n,*rowners;
2809   int            fd;
2810 
2811   PetscFunctionBegin;
2812   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2813   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2814   if (!rank) {
2815     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2816     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2817     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2818   }
2819 
2820   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2821   M = header[1]; N = header[2];
2822   /* determine ownership of all rows */
2823   m    = M/size + ((M % size) > rank);
2824   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2825   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2826 
2827   /* First process needs enough room for process with most rows */
2828   if (!rank) {
2829     mmax       = rowners[1];
2830     for (i=2; i<size; i++) {
2831       mmax = PetscMax(mmax,rowners[i]);
2832     }
2833   } else mmax = m;
2834 
2835   rowners[0] = 0;
2836   for (i=2; i<=size; i++) {
2837     rowners[i] += rowners[i-1];
2838   }
2839   rstart = rowners[rank];
2840   rend   = rowners[rank+1];
2841 
2842   /* distribute row lengths to all processors */
2843   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2844   if (!rank) {
2845     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2846     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2847     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2848     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2849     for (j=0; j<m; j++) {
2850       procsnz[0] += ourlens[j];
2851     }
2852     for (i=1; i<size; i++) {
2853       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2854       /* calculate the number of nonzeros on each processor */
2855       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2856         procsnz[i] += rowlengths[j];
2857       }
2858       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2859     }
2860     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2861   } else {
2862     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2863   }
2864 
2865   if (!rank) {
2866     /* determine max buffer needed and allocate it */
2867     maxnz = 0;
2868     for (i=0; i<size; i++) {
2869       maxnz = PetscMax(maxnz,procsnz[i]);
2870     }
2871     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2872 
2873     /* read in my part of the matrix column indices  */
2874     nz   = procsnz[0];
2875     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2876     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2877 
2878     /* read in every one elses and ship off */
2879     for (i=1; i<size; i++) {
2880       nz   = procsnz[i];
2881       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2882       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2883     }
2884     ierr = PetscFree(cols);CHKERRQ(ierr);
2885   } else {
2886     /* determine buffer space needed for message */
2887     nz = 0;
2888     for (i=0; i<m; i++) {
2889       nz += ourlens[i];
2890     }
2891     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2892 
2893     /* receive message of column indices*/
2894     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2895     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2896     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2897   }
2898 
2899   /* determine column ownership if matrix is not square */
2900   if (N != M) {
2901     n      = N/size + ((N % size) > rank);
2902     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2903     cstart = cend - n;
2904   } else {
2905     cstart = rstart;
2906     cend   = rend;
2907     n      = cend - cstart;
2908   }
2909 
2910   /* loop over local rows, determining number of off diagonal entries */
2911   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2912   jj = 0;
2913   for (i=0; i<m; i++) {
2914     for (j=0; j<ourlens[i]; j++) {
2915       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2916       jj++;
2917     }
2918   }
2919 
2920   /* create our matrix */
2921   for (i=0; i<m; i++) {
2922     ourlens[i] -= offlens[i];
2923   }
2924   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2925   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2926   ierr = MatSetType(A,type);CHKERRQ(ierr);
2927   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2928 
2929   for (i=0; i<m; i++) {
2930     ourlens[i] += offlens[i];
2931   }
2932 
2933   if (!rank) {
2934     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2935 
2936     /* read in my part of the matrix numerical values  */
2937     nz   = procsnz[0];
2938     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2939 
2940     /* insert into matrix */
2941     jj      = rstart;
2942     smycols = mycols;
2943     svals   = vals;
2944     for (i=0; i<m; i++) {
2945       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2946       smycols += ourlens[i];
2947       svals   += ourlens[i];
2948       jj++;
2949     }
2950 
2951     /* read in other processors and ship out */
2952     for (i=1; i<size; i++) {
2953       nz   = procsnz[i];
2954       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2955       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2956     }
2957     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2958   } else {
2959     /* receive numeric values */
2960     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2961 
2962     /* receive message of values*/
2963     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2964     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2965     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2966 
2967     /* insert into matrix */
2968     jj      = rstart;
2969     smycols = mycols;
2970     svals   = vals;
2971     for (i=0; i<m; i++) {
2972       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2973       smycols += ourlens[i];
2974       svals   += ourlens[i];
2975       jj++;
2976     }
2977   }
2978   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2979   ierr = PetscFree(vals);CHKERRQ(ierr);
2980   ierr = PetscFree(mycols);CHKERRQ(ierr);
2981   ierr = PetscFree(rowners);CHKERRQ(ierr);
2982 
2983   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2984   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2985   *newmat = A;
2986   PetscFunctionReturn(0);
2987 }
2988 
2989 #undef __FUNCT__
2990 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2991 /*
2992     Not great since it makes two copies of the submatrix, first an SeqAIJ
2993   in local and then by concatenating the local matrices the end result.
2994   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2995 */
2996 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2997 {
2998   PetscErrorCode ierr;
2999   PetscMPIInt    rank,size;
3000   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3001   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3002   Mat            *local,M,Mreuse;
3003   MatScalar      *vwork,*aa;
3004   MPI_Comm       comm = ((PetscObject)mat)->comm;
3005   Mat_SeqAIJ     *aij;
3006 
3007 
3008   PetscFunctionBegin;
3009   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3010   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3011 
3012   if (call ==  MAT_REUSE_MATRIX) {
3013     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3014     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3015     local = &Mreuse;
3016     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3017   } else {
3018     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3019     Mreuse = *local;
3020     ierr   = PetscFree(local);CHKERRQ(ierr);
3021   }
3022 
3023   /*
3024       m - number of local rows
3025       n - number of columns (same on all processors)
3026       rstart - first row in new global matrix generated
3027   */
3028   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3029   if (call == MAT_INITIAL_MATRIX) {
3030     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3031     ii  = aij->i;
3032     jj  = aij->j;
3033 
3034     /*
3035         Determine the number of non-zeros in the diagonal and off-diagonal
3036         portions of the matrix in order to do correct preallocation
3037     */
3038 
3039     /* first get start and end of "diagonal" columns */
3040     if (csize == PETSC_DECIDE) {
3041       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3042       if (mglobal == n) { /* square matrix */
3043 	nlocal = m;
3044       } else {
3045         nlocal = n/size + ((n % size) > rank);
3046       }
3047     } else {
3048       nlocal = csize;
3049     }
3050     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3051     rstart = rend - nlocal;
3052     if (rank == size - 1 && rend != n) {
3053       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3054     }
3055 
3056     /* next, compute all the lengths */
3057     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3058     olens = dlens + m;
3059     for (i=0; i<m; i++) {
3060       jend = ii[i+1] - ii[i];
3061       olen = 0;
3062       dlen = 0;
3063       for (j=0; j<jend; j++) {
3064         if (*jj < rstart || *jj >= rend) olen++;
3065         else dlen++;
3066         jj++;
3067       }
3068       olens[i] = olen;
3069       dlens[i] = dlen;
3070     }
3071     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3072     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3073     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3074     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3075     ierr = PetscFree(dlens);CHKERRQ(ierr);
3076   } else {
3077     PetscInt ml,nl;
3078 
3079     M = *newmat;
3080     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3081     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3082     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3083     /*
3084          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3085        rather than the slower MatSetValues().
3086     */
3087     M->was_assembled = PETSC_TRUE;
3088     M->assembled     = PETSC_FALSE;
3089   }
3090   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3091   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3092   ii  = aij->i;
3093   jj  = aij->j;
3094   aa  = aij->a;
3095   for (i=0; i<m; i++) {
3096     row   = rstart + i;
3097     nz    = ii[i+1] - ii[i];
3098     cwork = jj;     jj += nz;
3099     vwork = aa;     aa += nz;
3100     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3101   }
3102 
3103   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3104   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3105   *newmat = M;
3106 
3107   /* save submatrix used in processor for next request */
3108   if (call ==  MAT_INITIAL_MATRIX) {
3109     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3110     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3111   }
3112 
3113   PetscFunctionReturn(0);
3114 }
3115 
3116 EXTERN_C_BEGIN
3117 #undef __FUNCT__
3118 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3119 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3120 {
3121   PetscInt       m,cstart, cend,j,nnz,i,d;
3122   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3123   const PetscInt *JJ;
3124   PetscScalar    *values;
3125   PetscErrorCode ierr;
3126 
3127   PetscFunctionBegin;
3128   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3129 
3130   B->rmap->bs = B->cmap->bs = 1;
3131   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
3132   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
3133   m      = B->rmap->n;
3134   cstart = B->cmap->rstart;
3135   cend   = B->cmap->rend;
3136   rstart = B->rmap->rstart;
3137 
3138   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
3139   o_nnz = d_nnz + m;
3140 
3141 #if defined(PETSC_USE_DEBUGGING)
3142   for (i=0; i<m; i++) {
3143     nnz     = Ii[i+1]- Ii[i];
3144     JJ      = J + Ii[i];
3145     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3146     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3147     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3148     for (j=1; j<nnz; j++) {
3149       if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j);
3150     }
3151   }
3152 #endif
3153 
3154   for (i=0; i<m; i++) {
3155     nnz     = Ii[i+1]- Ii[i];
3156     JJ      = J + Ii[i];
3157     nnz_max = PetscMax(nnz_max,nnz);
3158     for (j=0; j<nnz; j++) {
3159       if (*JJ >= cstart) break;
3160       JJ++;
3161     }
3162     d = 0;
3163     for (; j<nnz; j++) {
3164       if (*JJ++ >= cend) break;
3165       d++;
3166     }
3167     d_nnz[i] = d;
3168     o_nnz[i] = nnz - d;
3169   }
3170   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3171   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
3172 
3173   if (v) values = (PetscScalar*)v;
3174   else {
3175     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3176     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3177   }
3178 
3179   for (i=0; i<m; i++) {
3180     ii   = i + rstart;
3181     nnz  = Ii[i+1]- Ii[i];
3182     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3183   }
3184   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3185   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3186 
3187   if (!v) {
3188     ierr = PetscFree(values);CHKERRQ(ierr);
3189   }
3190   PetscFunctionReturn(0);
3191 }
3192 EXTERN_C_END
3193 
3194 #undef __FUNCT__
3195 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3196 /*@
3197    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3198    (the default parallel PETSc format).
3199 
3200    Collective on MPI_Comm
3201 
3202    Input Parameters:
3203 +  B - the matrix
3204 .  i - the indices into j for the start of each local row (starts with zero)
3205 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3206 -  v - optional values in the matrix
3207 
3208    Level: developer
3209 
3210    Notes:
3211        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3212      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3213      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3214 
3215        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3216 
3217        The format which is used for the sparse matrix input, is equivalent to a
3218     row-major ordering.. i.e for the following matrix, the input data expected is
3219     as shown:
3220 
3221         1 0 0
3222         2 0 3     P0
3223        -------
3224         4 5 6     P1
3225 
3226      Process0 [P0]: rows_owned=[0,1]
3227         i =  {0,1,3}  [size = nrow+1  = 2+1]
3228         j =  {0,0,2}  [size = nz = 6]
3229         v =  {1,2,3}  [size = nz = 6]
3230 
3231      Process1 [P1]: rows_owned=[2]
3232         i =  {0,3}    [size = nrow+1  = 1+1]
3233         j =  {0,1,2}  [size = nz = 6]
3234         v =  {4,5,6}  [size = nz = 6]
3235 
3236       The column indices for each row MUST be sorted.
3237 
3238 .keywords: matrix, aij, compressed row, sparse, parallel
3239 
3240 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3241           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3242 @*/
3243 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3244 {
3245   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3246 
3247   PetscFunctionBegin;
3248   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3249   if (f) {
3250     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3251   }
3252   PetscFunctionReturn(0);
3253 }
3254 
3255 #undef __FUNCT__
3256 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3257 /*@C
3258    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3259    (the default parallel PETSc format).  For good matrix assembly performance
3260    the user should preallocate the matrix storage by setting the parameters
3261    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3262    performance can be increased by more than a factor of 50.
3263 
3264    Collective on MPI_Comm
3265 
3266    Input Parameters:
3267 +  A - the matrix
3268 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3269            (same value is used for all local rows)
3270 .  d_nnz - array containing the number of nonzeros in the various rows of the
3271            DIAGONAL portion of the local submatrix (possibly different for each row)
3272            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3273            The size of this array is equal to the number of local rows, i.e 'm'.
3274            You must leave room for the diagonal entry even if it is zero.
3275 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3276            submatrix (same value is used for all local rows).
3277 -  o_nnz - array containing the number of nonzeros in the various rows of the
3278            OFF-DIAGONAL portion of the local submatrix (possibly different for
3279            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3280            structure. The size of this array is equal to the number
3281            of local rows, i.e 'm'.
3282 
3283    If the *_nnz parameter is given then the *_nz parameter is ignored
3284 
3285    The AIJ format (also called the Yale sparse matrix format or
3286    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3287    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3288 
3289    The parallel matrix is partitioned such that the first m0 rows belong to
3290    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3291    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3292 
3293    The DIAGONAL portion of the local submatrix of a processor can be defined
3294    as the submatrix which is obtained by extraction the part corresponding
3295    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3296    first row that belongs to the processor, and r2 is the last row belonging
3297    to the this processor. This is a square mxm matrix. The remaining portion
3298    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3299 
3300    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3301 
3302    You can call MatGetInfo() to get information on how effective the preallocation was;
3303    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3304    You can also run with the option -info and look for messages with the string
3305    malloc in them to see if additional memory allocation was needed.
3306 
3307    Example usage:
3308 
3309    Consider the following 8x8 matrix with 34 non-zero values, that is
3310    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3311    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3312    as follows:
3313 
3314 .vb
3315             1  2  0  |  0  3  0  |  0  4
3316     Proc0   0  5  6  |  7  0  0  |  8  0
3317             9  0 10  | 11  0  0  | 12  0
3318     -------------------------------------
3319            13  0 14  | 15 16 17  |  0  0
3320     Proc1   0 18  0  | 19 20 21  |  0  0
3321             0  0  0  | 22 23  0  | 24  0
3322     -------------------------------------
3323     Proc2  25 26 27  |  0  0 28  | 29  0
3324            30  0  0  | 31 32 33  |  0 34
3325 .ve
3326 
3327    This can be represented as a collection of submatrices as:
3328 
3329 .vb
3330       A B C
3331       D E F
3332       G H I
3333 .ve
3334 
3335    Where the submatrices A,B,C are owned by proc0, D,E,F are
3336    owned by proc1, G,H,I are owned by proc2.
3337 
3338    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3339    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3340    The 'M','N' parameters are 8,8, and have the same values on all procs.
3341 
3342    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3343    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3344    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3345    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3346    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3347    matrix, ans [DF] as another SeqAIJ matrix.
3348 
3349    When d_nz, o_nz parameters are specified, d_nz storage elements are
3350    allocated for every row of the local diagonal submatrix, and o_nz
3351    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3352    One way to choose d_nz and o_nz is to use the max nonzerors per local
3353    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3354    In this case, the values of d_nz,o_nz are:
3355 .vb
3356      proc0 : dnz = 2, o_nz = 2
3357      proc1 : dnz = 3, o_nz = 2
3358      proc2 : dnz = 1, o_nz = 4
3359 .ve
3360    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3361    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3362    for proc3. i.e we are using 12+15+10=37 storage locations to store
3363    34 values.
3364 
3365    When d_nnz, o_nnz parameters are specified, the storage is specified
3366    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3367    In the above case the values for d_nnz,o_nnz are:
3368 .vb
3369      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3370      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3371      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3372 .ve
3373    Here the space allocated is sum of all the above values i.e 34, and
3374    hence pre-allocation is perfect.
3375 
3376    Level: intermediate
3377 
3378 .keywords: matrix, aij, compressed row, sparse, parallel
3379 
3380 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3381           MPIAIJ, MatGetInfo()
3382 @*/
3383 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3384 {
3385   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3386 
3387   PetscFunctionBegin;
3388   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3389   if (f) {
3390     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3391   }
3392   PetscFunctionReturn(0);
3393 }
3394 
3395 #undef __FUNCT__
3396 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3397 /*@
3398      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3399          CSR format the local rows.
3400 
3401    Collective on MPI_Comm
3402 
3403    Input Parameters:
3404 +  comm - MPI communicator
3405 .  m - number of local rows (Cannot be PETSC_DECIDE)
3406 .  n - This value should be the same as the local size used in creating the
3407        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3408        calculated if N is given) For square matrices n is almost always m.
3409 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3410 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3411 .   i - row indices
3412 .   j - column indices
3413 -   a - matrix values
3414 
3415    Output Parameter:
3416 .   mat - the matrix
3417 
3418    Level: intermediate
3419 
3420    Notes:
3421        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3422      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3423      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3424 
3425        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3426 
3427        The format which is used for the sparse matrix input, is equivalent to a
3428     row-major ordering.. i.e for the following matrix, the input data expected is
3429     as shown:
3430 
3431         1 0 0
3432         2 0 3     P0
3433        -------
3434         4 5 6     P1
3435 
3436      Process0 [P0]: rows_owned=[0,1]
3437         i =  {0,1,3}  [size = nrow+1  = 2+1]
3438         j =  {0,0,2}  [size = nz = 6]
3439         v =  {1,2,3}  [size = nz = 6]
3440 
3441      Process1 [P1]: rows_owned=[2]
3442         i =  {0,3}    [size = nrow+1  = 1+1]
3443         j =  {0,1,2}  [size = nz = 6]
3444         v =  {4,5,6}  [size = nz = 6]
3445 
3446 .keywords: matrix, aij, compressed row, sparse, parallel
3447 
3448 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3449           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3450 @*/
3451 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3452 {
3453   PetscErrorCode ierr;
3454 
3455  PetscFunctionBegin;
3456   if (i[0]) {
3457     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3458   }
3459   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3460   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3461   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3462   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3463   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3464   PetscFunctionReturn(0);
3465 }
3466 
3467 #undef __FUNCT__
3468 #define __FUNCT__ "MatCreateMPIAIJ"
3469 /*@C
3470    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3471    (the default parallel PETSc format).  For good matrix assembly performance
3472    the user should preallocate the matrix storage by setting the parameters
3473    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3474    performance can be increased by more than a factor of 50.
3475 
3476    Collective on MPI_Comm
3477 
3478    Input Parameters:
3479 +  comm - MPI communicator
3480 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3481            This value should be the same as the local size used in creating the
3482            y vector for the matrix-vector product y = Ax.
3483 .  n - This value should be the same as the local size used in creating the
3484        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3485        calculated if N is given) For square matrices n is almost always m.
3486 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3487 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3488 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3489            (same value is used for all local rows)
3490 .  d_nnz - array containing the number of nonzeros in the various rows of the
3491            DIAGONAL portion of the local submatrix (possibly different for each row)
3492            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3493            The size of this array is equal to the number of local rows, i.e 'm'.
3494            You must leave room for the diagonal entry even if it is zero.
3495 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3496            submatrix (same value is used for all local rows).
3497 -  o_nnz - array containing the number of nonzeros in the various rows of the
3498            OFF-DIAGONAL portion of the local submatrix (possibly different for
3499            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3500            structure. The size of this array is equal to the number
3501            of local rows, i.e 'm'.
3502 
3503    Output Parameter:
3504 .  A - the matrix
3505 
3506    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3507    MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
3508    true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
3509    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3510 
3511    Notes:
3512    If the *_nnz parameter is given then the *_nz parameter is ignored
3513 
3514    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3515    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3516    storage requirements for this matrix.
3517 
3518    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3519    processor than it must be used on all processors that share the object for
3520    that argument.
3521 
3522    The user MUST specify either the local or global matrix dimensions
3523    (possibly both).
3524 
3525    The parallel matrix is partitioned across processors such that the
3526    first m0 rows belong to process 0, the next m1 rows belong to
3527    process 1, the next m2 rows belong to process 2 etc.. where
3528    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3529    values corresponding to [m x N] submatrix.
3530 
3531    The columns are logically partitioned with the n0 columns belonging
3532    to 0th partition, the next n1 columns belonging to the next
3533    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3534 
3535    The DIAGONAL portion of the local submatrix on any given processor
3536    is the submatrix corresponding to the rows and columns m,n
3537    corresponding to the given processor. i.e diagonal matrix on
3538    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3539    etc. The remaining portion of the local submatrix [m x (N-n)]
3540    constitute the OFF-DIAGONAL portion. The example below better
3541    illustrates this concept.
3542 
3543    For a square global matrix we define each processor's diagonal portion
3544    to be its local rows and the corresponding columns (a square submatrix);
3545    each processor's off-diagonal portion encompasses the remainder of the
3546    local matrix (a rectangular submatrix).
3547 
3548    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3549 
3550    When calling this routine with a single process communicator, a matrix of
3551    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3552    type of communicator, use the construction mechanism:
3553      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
3554 
3555    By default, this format uses inodes (identical nodes) when possible.
3556    We search for consecutive rows with the same nonzero structure, thereby
3557    reusing matrix information to achieve increased efficiency.
3558 
3559    Options Database Keys:
3560 +  -mat_no_inode  - Do not use inodes
3561 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3562 -  -mat_aij_oneindex - Internally use indexing starting at 1
3563         rather than 0.  Note that when calling MatSetValues(),
3564         the user still MUST index entries starting at 0!
3565 
3566 
3567    Example usage:
3568 
3569    Consider the following 8x8 matrix with 34 non-zero values, that is
3570    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3571    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3572    as follows:
3573 
3574 .vb
3575             1  2  0  |  0  3  0  |  0  4
3576     Proc0   0  5  6  |  7  0  0  |  8  0
3577             9  0 10  | 11  0  0  | 12  0
3578     -------------------------------------
3579            13  0 14  | 15 16 17  |  0  0
3580     Proc1   0 18  0  | 19 20 21  |  0  0
3581             0  0  0  | 22 23  0  | 24  0
3582     -------------------------------------
3583     Proc2  25 26 27  |  0  0 28  | 29  0
3584            30  0  0  | 31 32 33  |  0 34
3585 .ve
3586 
3587    This can be represented as a collection of submatrices as:
3588 
3589 .vb
3590       A B C
3591       D E F
3592       G H I
3593 .ve
3594 
3595    Where the submatrices A,B,C are owned by proc0, D,E,F are
3596    owned by proc1, G,H,I are owned by proc2.
3597 
3598    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3599    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3600    The 'M','N' parameters are 8,8, and have the same values on all procs.
3601 
3602    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3603    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3604    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3605    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3606    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3607    matrix, ans [DF] as another SeqAIJ matrix.
3608 
3609    When d_nz, o_nz parameters are specified, d_nz storage elements are
3610    allocated for every row of the local diagonal submatrix, and o_nz
3611    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3612    One way to choose d_nz and o_nz is to use the max nonzerors per local
3613    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3614    In this case, the values of d_nz,o_nz are:
3615 .vb
3616      proc0 : dnz = 2, o_nz = 2
3617      proc1 : dnz = 3, o_nz = 2
3618      proc2 : dnz = 1, o_nz = 4
3619 .ve
3620    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3621    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3622    for proc3. i.e we are using 12+15+10=37 storage locations to store
3623    34 values.
3624 
3625    When d_nnz, o_nnz parameters are specified, the storage is specified
3626    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3627    In the above case the values for d_nnz,o_nnz are:
3628 .vb
3629      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3630      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3631      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3632 .ve
3633    Here the space allocated is sum of all the above values i.e 34, and
3634    hence pre-allocation is perfect.
3635 
3636    Level: intermediate
3637 
3638 .keywords: matrix, aij, compressed row, sparse, parallel
3639 
3640 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3641           MPIAIJ, MatCreateMPIAIJWithArrays()
3642 @*/
3643 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3644 {
3645   PetscErrorCode ierr;
3646   PetscMPIInt    size;
3647 
3648   PetscFunctionBegin;
3649   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3650   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3651   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3652   if (size > 1) {
3653     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3654     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3655   } else {
3656     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3657     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3658   }
3659   PetscFunctionReturn(0);
3660 }
3661 
3662 #undef __FUNCT__
3663 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3664 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3665 {
3666   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3667 
3668   PetscFunctionBegin;
3669   *Ad     = a->A;
3670   *Ao     = a->B;
3671   *colmap = a->garray;
3672   PetscFunctionReturn(0);
3673 }
3674 
3675 #undef __FUNCT__
3676 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3677 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3678 {
3679   PetscErrorCode ierr;
3680   PetscInt       i;
3681   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3682 
3683   PetscFunctionBegin;
3684   if (coloring->ctype == IS_COLORING_GLOBAL) {
3685     ISColoringValue *allcolors,*colors;
3686     ISColoring      ocoloring;
3687 
3688     /* set coloring for diagonal portion */
3689     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3690 
3691     /* set coloring for off-diagonal portion */
3692     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3693     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3694     for (i=0; i<a->B->cmap->n; i++) {
3695       colors[i] = allcolors[a->garray[i]];
3696     }
3697     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3698     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3699     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3700     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3701   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3702     ISColoringValue *colors;
3703     PetscInt        *larray;
3704     ISColoring      ocoloring;
3705 
3706     /* set coloring for diagonal portion */
3707     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3708     for (i=0; i<a->A->cmap->n; i++) {
3709       larray[i] = i + A->cmap->rstart;
3710     }
3711     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3712     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3713     for (i=0; i<a->A->cmap->n; i++) {
3714       colors[i] = coloring->colors[larray[i]];
3715     }
3716     ierr = PetscFree(larray);CHKERRQ(ierr);
3717     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3718     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3719     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3720 
3721     /* set coloring for off-diagonal portion */
3722     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3723     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3724     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3725     for (i=0; i<a->B->cmap->n; i++) {
3726       colors[i] = coloring->colors[larray[i]];
3727     }
3728     ierr = PetscFree(larray);CHKERRQ(ierr);
3729     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3730     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3731     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3732   } else {
3733     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3734   }
3735 
3736   PetscFunctionReturn(0);
3737 }
3738 
3739 #if defined(PETSC_HAVE_ADIC)
3740 #undef __FUNCT__
3741 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3742 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3743 {
3744   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3745   PetscErrorCode ierr;
3746 
3747   PetscFunctionBegin;
3748   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3749   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3750   PetscFunctionReturn(0);
3751 }
3752 #endif
3753 
3754 #undef __FUNCT__
3755 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3756 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3757 {
3758   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3759   PetscErrorCode ierr;
3760 
3761   PetscFunctionBegin;
3762   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3763   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3764   PetscFunctionReturn(0);
3765 }
3766 
3767 #undef __FUNCT__
3768 #define __FUNCT__ "MatMerge"
3769 /*@
3770       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3771                  matrices from each processor
3772 
3773     Collective on MPI_Comm
3774 
3775    Input Parameters:
3776 +    comm - the communicators the parallel matrix will live on
3777 .    inmat - the input sequential matrices
3778 .    n - number of local columns (or PETSC_DECIDE)
3779 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3780 
3781    Output Parameter:
3782 .    outmat - the parallel matrix generated
3783 
3784     Level: advanced
3785 
3786    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3787 
3788 @*/
3789 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3790 {
3791   PetscErrorCode ierr;
3792   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3793   PetscInt       *indx;
3794   PetscScalar    *values;
3795 
3796   PetscFunctionBegin;
3797   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3798   if (scall == MAT_INITIAL_MATRIX){
3799     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3800     if (n == PETSC_DECIDE){
3801       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3802     }
3803     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3804     rstart -= m;
3805 
3806     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3807     for (i=0;i<m;i++) {
3808       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3809       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3810       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3811     }
3812     /* This routine will ONLY return MPIAIJ type matrix */
3813     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3814     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3815     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3816     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3817     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3818 
3819   } else if (scall == MAT_REUSE_MATRIX){
3820     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3821   } else {
3822     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3823   }
3824 
3825   for (i=0;i<m;i++) {
3826     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3827     Ii    = i + rstart;
3828     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3829     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3830   }
3831   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3832   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3833   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3834 
3835   PetscFunctionReturn(0);
3836 }
3837 
3838 #undef __FUNCT__
3839 #define __FUNCT__ "MatFileSplit"
3840 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3841 {
3842   PetscErrorCode    ierr;
3843   PetscMPIInt       rank;
3844   PetscInt          m,N,i,rstart,nnz;
3845   size_t            len;
3846   const PetscInt    *indx;
3847   PetscViewer       out;
3848   char              *name;
3849   Mat               B;
3850   const PetscScalar *values;
3851 
3852   PetscFunctionBegin;
3853   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3854   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3855   /* Should this be the type of the diagonal block of A? */
3856   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3857   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3858   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3859   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3860   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3861   for (i=0;i<m;i++) {
3862     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3863     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3864     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3865   }
3866   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3867   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3868 
3869   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3870   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3871   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3872   sprintf(name,"%s.%d",outfile,rank);
3873   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3874   ierr = PetscFree(name);
3875   ierr = MatView(B,out);CHKERRQ(ierr);
3876   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3877   ierr = MatDestroy(B);CHKERRQ(ierr);
3878   PetscFunctionReturn(0);
3879 }
3880 
3881 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3882 #undef __FUNCT__
3883 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3884 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3885 {
3886   PetscErrorCode       ierr;
3887   Mat_Merge_SeqsToMPI  *merge;
3888   PetscContainer       container;
3889 
3890   PetscFunctionBegin;
3891   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3892   if (container) {
3893     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3894     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3895     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3896     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3897     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3898     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3899     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3900     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3901     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3902     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3903     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3904     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
3905 
3906     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
3907     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3908   }
3909   ierr = PetscFree(merge);CHKERRQ(ierr);
3910 
3911   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3912   PetscFunctionReturn(0);
3913 }
3914 
3915 #include "src/mat/utils/freespace.h"
3916 #include "petscbt.h"
3917 
3918 #undef __FUNCT__
3919 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3920 /*@C
3921       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3922                  matrices from each processor
3923 
3924     Collective on MPI_Comm
3925 
3926    Input Parameters:
3927 +    comm - the communicators the parallel matrix will live on
3928 .    seqmat - the input sequential matrices
3929 .    m - number of local rows (or PETSC_DECIDE)
3930 .    n - number of local columns (or PETSC_DECIDE)
3931 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3932 
3933    Output Parameter:
3934 .    mpimat - the parallel matrix generated
3935 
3936     Level: advanced
3937 
3938    Notes:
3939      The dimensions of the sequential matrix in each processor MUST be the same.
3940      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3941      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3942 @*/
3943 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3944 {
3945   PetscErrorCode       ierr;
3946   MPI_Comm             comm=((PetscObject)mpimat)->comm;
3947   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3948   PetscMPIInt          size,rank,taga,*len_s;
3949   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
3950   PetscInt             proc,m;
3951   PetscInt             **buf_ri,**buf_rj;
3952   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3953   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3954   MPI_Request          *s_waits,*r_waits;
3955   MPI_Status           *status;
3956   MatScalar            *aa=a->a;
3957   MatScalar            **abuf_r,*ba_i;
3958   Mat_Merge_SeqsToMPI  *merge;
3959   PetscContainer       container;
3960 
3961   PetscFunctionBegin;
3962   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3963 
3964   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3965   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3966 
3967   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3968   if (container) {
3969     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3970   }
3971   bi     = merge->bi;
3972   bj     = merge->bj;
3973   buf_ri = merge->buf_ri;
3974   buf_rj = merge->buf_rj;
3975 
3976   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3977   owners = merge->rowmap.range;
3978   len_s  = merge->len_s;
3979 
3980   /* send and recv matrix values */
3981   /*-----------------------------*/
3982   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3983   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3984 
3985   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3986   for (proc=0,k=0; proc<size; proc++){
3987     if (!len_s[proc]) continue;
3988     i = owners[proc];
3989     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3990     k++;
3991   }
3992 
3993   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3994   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3995   ierr = PetscFree(status);CHKERRQ(ierr);
3996 
3997   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3998   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3999 
4000   /* insert mat values of mpimat */
4001   /*----------------------------*/
4002   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4003   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4004   nextrow = buf_ri_k + merge->nrecv;
4005   nextai  = nextrow + merge->nrecv;
4006 
4007   for (k=0; k<merge->nrecv; k++){
4008     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4009     nrows = *(buf_ri_k[k]);
4010     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4011     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4012   }
4013 
4014   /* set values of ba */
4015   m = merge->rowmap.n;
4016   for (i=0; i<m; i++) {
4017     arow = owners[rank] + i;
4018     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4019     bnzi = bi[i+1] - bi[i];
4020     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4021 
4022     /* add local non-zero vals of this proc's seqmat into ba */
4023     anzi = ai[arow+1] - ai[arow];
4024     aj   = a->j + ai[arow];
4025     aa   = a->a + ai[arow];
4026     nextaj = 0;
4027     for (j=0; nextaj<anzi; j++){
4028       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4029         ba_i[j] += aa[nextaj++];
4030       }
4031     }
4032 
4033     /* add received vals into ba */
4034     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4035       /* i-th row */
4036       if (i == *nextrow[k]) {
4037         anzi = *(nextai[k]+1) - *nextai[k];
4038         aj   = buf_rj[k] + *(nextai[k]);
4039         aa   = abuf_r[k] + *(nextai[k]);
4040         nextaj = 0;
4041         for (j=0; nextaj<anzi; j++){
4042           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4043             ba_i[j] += aa[nextaj++];
4044           }
4045         }
4046         nextrow[k]++; nextai[k]++;
4047       }
4048     }
4049     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4050   }
4051   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4052   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4053 
4054   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4055   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4056   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4057   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4058   PetscFunctionReturn(0);
4059 }
4060 
4061 #undef __FUNCT__
4062 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4063 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4064 {
4065   PetscErrorCode       ierr;
4066   Mat                  B_mpi;
4067   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4068   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4069   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4070   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4071   PetscInt             len,proc,*dnz,*onz;
4072   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4073   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4074   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4075   MPI_Status           *status;
4076   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4077   PetscBT              lnkbt;
4078   Mat_Merge_SeqsToMPI  *merge;
4079   PetscContainer       container;
4080 
4081   PetscFunctionBegin;
4082   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4083 
4084   /* make sure it is a PETSc comm */
4085   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4086   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4087   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4088 
4089   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4090   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4091 
4092   /* determine row ownership */
4093   /*---------------------------------------------------------*/
4094   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
4095   merge->rowmap.n = m;
4096   merge->rowmap.N = M;
4097   merge->rowmap.bs = 1;
4098   ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr);
4099   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4100   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4101 
4102   m      = merge->rowmap.n;
4103   M      = merge->rowmap.N;
4104   owners = merge->rowmap.range;
4105 
4106   /* determine the number of messages to send, their lengths */
4107   /*---------------------------------------------------------*/
4108   len_s  = merge->len_s;
4109 
4110   len = 0;  /* length of buf_si[] */
4111   merge->nsend = 0;
4112   for (proc=0; proc<size; proc++){
4113     len_si[proc] = 0;
4114     if (proc == rank){
4115       len_s[proc] = 0;
4116     } else {
4117       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4118       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4119     }
4120     if (len_s[proc]) {
4121       merge->nsend++;
4122       nrows = 0;
4123       for (i=owners[proc]; i<owners[proc+1]; i++){
4124         if (ai[i+1] > ai[i]) nrows++;
4125       }
4126       len_si[proc] = 2*(nrows+1);
4127       len += len_si[proc];
4128     }
4129   }
4130 
4131   /* determine the number and length of messages to receive for ij-structure */
4132   /*-------------------------------------------------------------------------*/
4133   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4134   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4135 
4136   /* post the Irecv of j-structure */
4137   /*-------------------------------*/
4138   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4139   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4140 
4141   /* post the Isend of j-structure */
4142   /*--------------------------------*/
4143   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
4144   sj_waits = si_waits + merge->nsend;
4145 
4146   for (proc=0, k=0; proc<size; proc++){
4147     if (!len_s[proc]) continue;
4148     i = owners[proc];
4149     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4150     k++;
4151   }
4152 
4153   /* receives and sends of j-structure are complete */
4154   /*------------------------------------------------*/
4155   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4156   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4157 
4158   /* send and recv i-structure */
4159   /*---------------------------*/
4160   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4161   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4162 
4163   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4164   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4165   for (proc=0,k=0; proc<size; proc++){
4166     if (!len_s[proc]) continue;
4167     /* form outgoing message for i-structure:
4168          buf_si[0]:                 nrows to be sent
4169                [1:nrows]:           row index (global)
4170                [nrows+1:2*nrows+1]: i-structure index
4171     */
4172     /*-------------------------------------------*/
4173     nrows = len_si[proc]/2 - 1;
4174     buf_si_i    = buf_si + nrows+1;
4175     buf_si[0]   = nrows;
4176     buf_si_i[0] = 0;
4177     nrows = 0;
4178     for (i=owners[proc]; i<owners[proc+1]; i++){
4179       anzi = ai[i+1] - ai[i];
4180       if (anzi) {
4181         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4182         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4183         nrows++;
4184       }
4185     }
4186     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4187     k++;
4188     buf_si += len_si[proc];
4189   }
4190 
4191   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4192   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4193 
4194   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4195   for (i=0; i<merge->nrecv; i++){
4196     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4197   }
4198 
4199   ierr = PetscFree(len_si);CHKERRQ(ierr);
4200   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4201   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4202   ierr = PetscFree(si_waits);CHKERRQ(ierr);
4203   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4204   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4205   ierr = PetscFree(status);CHKERRQ(ierr);
4206 
4207   /* compute a local seq matrix in each processor */
4208   /*----------------------------------------------*/
4209   /* allocate bi array and free space for accumulating nonzero column info */
4210   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4211   bi[0] = 0;
4212 
4213   /* create and initialize a linked list */
4214   nlnk = N+1;
4215   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4216 
4217   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4218   len = 0;
4219   len  = ai[owners[rank+1]] - ai[owners[rank]];
4220   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4221   current_space = free_space;
4222 
4223   /* determine symbolic info for each local row */
4224   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4225   nextrow = buf_ri_k + merge->nrecv;
4226   nextai  = nextrow + merge->nrecv;
4227   for (k=0; k<merge->nrecv; k++){
4228     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4229     nrows = *buf_ri_k[k];
4230     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4231     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4232   }
4233 
4234   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4235   len = 0;
4236   for (i=0;i<m;i++) {
4237     bnzi   = 0;
4238     /* add local non-zero cols of this proc's seqmat into lnk */
4239     arow   = owners[rank] + i;
4240     anzi   = ai[arow+1] - ai[arow];
4241     aj     = a->j + ai[arow];
4242     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4243     bnzi += nlnk;
4244     /* add received col data into lnk */
4245     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4246       if (i == *nextrow[k]) { /* i-th row */
4247         anzi = *(nextai[k]+1) - *nextai[k];
4248         aj   = buf_rj[k] + *nextai[k];
4249         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4250         bnzi += nlnk;
4251         nextrow[k]++; nextai[k]++;
4252       }
4253     }
4254     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4255 
4256     /* if free space is not available, make more free space */
4257     if (current_space->local_remaining<bnzi) {
4258       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4259       nspacedouble++;
4260     }
4261     /* copy data into free space, then initialize lnk */
4262     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4263     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4264 
4265     current_space->array           += bnzi;
4266     current_space->local_used      += bnzi;
4267     current_space->local_remaining -= bnzi;
4268 
4269     bi[i+1] = bi[i] + bnzi;
4270   }
4271 
4272   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4273 
4274   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4275   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4276   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4277 
4278   /* create symbolic parallel matrix B_mpi */
4279   /*---------------------------------------*/
4280   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4281   if (n==PETSC_DECIDE) {
4282     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4283   } else {
4284     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4285   }
4286   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4287   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4288   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4289 
4290   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4291   B_mpi->assembled     = PETSC_FALSE;
4292   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4293   merge->bi            = bi;
4294   merge->bj            = bj;
4295   merge->buf_ri        = buf_ri;
4296   merge->buf_rj        = buf_rj;
4297   merge->coi           = PETSC_NULL;
4298   merge->coj           = PETSC_NULL;
4299   merge->owners_co     = PETSC_NULL;
4300 
4301   /* attach the supporting struct to B_mpi for reuse */
4302   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4303   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4304   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4305   *mpimat = B_mpi;
4306 
4307   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4308   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4309   PetscFunctionReturn(0);
4310 }
4311 
4312 #undef __FUNCT__
4313 #define __FUNCT__ "MatMerge_SeqsToMPI"
4314 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4315 {
4316   PetscErrorCode   ierr;
4317 
4318   PetscFunctionBegin;
4319   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4320   if (scall == MAT_INITIAL_MATRIX){
4321     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4322   }
4323   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4324   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4325   PetscFunctionReturn(0);
4326 }
4327 
4328 #undef __FUNCT__
4329 #define __FUNCT__ "MatGetLocalMat"
4330 /*@
4331      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4332 
4333     Not Collective
4334 
4335    Input Parameters:
4336 +    A - the matrix
4337 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4338 
4339    Output Parameter:
4340 .    A_loc - the local sequential matrix generated
4341 
4342     Level: developer
4343 
4344 @*/
4345 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4346 {
4347   PetscErrorCode  ierr;
4348   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4349   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4350   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4351   MatScalar       *aa=a->a,*ba=b->a,*cam;
4352   PetscScalar     *ca;
4353   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4354   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4355 
4356   PetscFunctionBegin;
4357   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4358   if (scall == MAT_INITIAL_MATRIX){
4359     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4360     ci[0] = 0;
4361     for (i=0; i<am; i++){
4362       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4363     }
4364     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4365     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4366     k = 0;
4367     for (i=0; i<am; i++) {
4368       ncols_o = bi[i+1] - bi[i];
4369       ncols_d = ai[i+1] - ai[i];
4370       /* off-diagonal portion of A */
4371       for (jo=0; jo<ncols_o; jo++) {
4372         col = cmap[*bj];
4373         if (col >= cstart) break;
4374         cj[k]   = col; bj++;
4375         ca[k++] = *ba++;
4376       }
4377       /* diagonal portion of A */
4378       for (j=0; j<ncols_d; j++) {
4379         cj[k]   = cstart + *aj++;
4380         ca[k++] = *aa++;
4381       }
4382       /* off-diagonal portion of A */
4383       for (j=jo; j<ncols_o; j++) {
4384         cj[k]   = cmap[*bj++];
4385         ca[k++] = *ba++;
4386       }
4387     }
4388     /* put together the new matrix */
4389     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4390     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4391     /* Since these are PETSc arrays, change flags to free them as necessary. */
4392     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4393     mat->free_a  = PETSC_TRUE;
4394     mat->free_ij = PETSC_TRUE;
4395     mat->nonew   = 0;
4396   } else if (scall == MAT_REUSE_MATRIX){
4397     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4398     ci = mat->i; cj = mat->j; cam = mat->a;
4399     for (i=0; i<am; i++) {
4400       /* off-diagonal portion of A */
4401       ncols_o = bi[i+1] - bi[i];
4402       for (jo=0; jo<ncols_o; jo++) {
4403         col = cmap[*bj];
4404         if (col >= cstart) break;
4405         *cam++ = *ba++; bj++;
4406       }
4407       /* diagonal portion of A */
4408       ncols_d = ai[i+1] - ai[i];
4409       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4410       /* off-diagonal portion of A */
4411       for (j=jo; j<ncols_o; j++) {
4412         *cam++ = *ba++; bj++;
4413       }
4414     }
4415   } else {
4416     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4417   }
4418 
4419   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4420   PetscFunctionReturn(0);
4421 }
4422 
4423 #undef __FUNCT__
4424 #define __FUNCT__ "MatGetLocalMatCondensed"
4425 /*@C
4426      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4427 
4428     Not Collective
4429 
4430    Input Parameters:
4431 +    A - the matrix
4432 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4433 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4434 
4435    Output Parameter:
4436 .    A_loc - the local sequential matrix generated
4437 
4438     Level: developer
4439 
4440 @*/
4441 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4442 {
4443   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4444   PetscErrorCode    ierr;
4445   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4446   IS                isrowa,iscola;
4447   Mat               *aloc;
4448 
4449   PetscFunctionBegin;
4450   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4451   if (!row){
4452     start = A->rmap->rstart; end = A->rmap->rend;
4453     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4454   } else {
4455     isrowa = *row;
4456   }
4457   if (!col){
4458     start = A->cmap->rstart;
4459     cmap  = a->garray;
4460     nzA   = a->A->cmap->n;
4461     nzB   = a->B->cmap->n;
4462     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4463     ncols = 0;
4464     for (i=0; i<nzB; i++) {
4465       if (cmap[i] < start) idx[ncols++] = cmap[i];
4466       else break;
4467     }
4468     imark = i;
4469     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4470     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4471     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4472     ierr = PetscFree(idx);CHKERRQ(ierr);
4473   } else {
4474     iscola = *col;
4475   }
4476   if (scall != MAT_INITIAL_MATRIX){
4477     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4478     aloc[0] = *A_loc;
4479   }
4480   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4481   *A_loc = aloc[0];
4482   ierr = PetscFree(aloc);CHKERRQ(ierr);
4483   if (!row){
4484     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4485   }
4486   if (!col){
4487     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4488   }
4489   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4490   PetscFunctionReturn(0);
4491 }
4492 
4493 #undef __FUNCT__
4494 #define __FUNCT__ "MatGetBrowsOfAcols"
4495 /*@C
4496     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4497 
4498     Collective on Mat
4499 
4500    Input Parameters:
4501 +    A,B - the matrices in mpiaij format
4502 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4503 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4504 
4505    Output Parameter:
4506 +    rowb, colb - index sets of rows and columns of B to extract
4507 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4508 -    B_seq - the sequential matrix generated
4509 
4510     Level: developer
4511 
4512 @*/
4513 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4514 {
4515   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4516   PetscErrorCode    ierr;
4517   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4518   IS                isrowb,iscolb;
4519   Mat               *bseq;
4520 
4521   PetscFunctionBegin;
4522   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4523     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4524   }
4525   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4526 
4527   if (scall == MAT_INITIAL_MATRIX){
4528     start = A->cmap->rstart;
4529     cmap  = a->garray;
4530     nzA   = a->A->cmap->n;
4531     nzB   = a->B->cmap->n;
4532     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4533     ncols = 0;
4534     for (i=0; i<nzB; i++) {  /* row < local row index */
4535       if (cmap[i] < start) idx[ncols++] = cmap[i];
4536       else break;
4537     }
4538     imark = i;
4539     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4540     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4541     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4542     ierr = PetscFree(idx);CHKERRQ(ierr);
4543     *brstart = imark;
4544     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4545   } else {
4546     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4547     isrowb = *rowb; iscolb = *colb;
4548     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4549     bseq[0] = *B_seq;
4550   }
4551   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4552   *B_seq = bseq[0];
4553   ierr = PetscFree(bseq);CHKERRQ(ierr);
4554   if (!rowb){
4555     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4556   } else {
4557     *rowb = isrowb;
4558   }
4559   if (!colb){
4560     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4561   } else {
4562     *colb = iscolb;
4563   }
4564   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4565   PetscFunctionReturn(0);
4566 }
4567 
4568 #undef __FUNCT__
4569 #define __FUNCT__ "MatGetBrowsOfAoCols"
4570 /*@C
4571     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4572     of the OFF-DIAGONAL portion of local A
4573 
4574     Collective on Mat
4575 
4576    Input Parameters:
4577 +    A,B - the matrices in mpiaij format
4578 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4579 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4580 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4581 
4582    Output Parameter:
4583 +    B_oth - the sequential matrix generated
4584 
4585     Level: developer
4586 
4587 @*/
4588 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth)
4589 {
4590   VecScatter_MPI_General *gen_to,*gen_from;
4591   PetscErrorCode         ierr;
4592   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4593   Mat_SeqAIJ             *b_oth;
4594   VecScatter             ctx=a->Mvctx;
4595   MPI_Comm               comm=((PetscObject)ctx)->comm;
4596   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4597   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4598   PetscScalar            *rvalues,*svalues;
4599   MatScalar              *b_otha,*bufa,*bufA;
4600   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4601   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4602   MPI_Status             *sstatus,rstatus;
4603   PetscMPIInt            jj;
4604   PetscInt               *cols,sbs,rbs;
4605   PetscScalar            *vals;
4606 
4607   PetscFunctionBegin;
4608   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4609     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4610   }
4611   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4612   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4613 
4614   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4615   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4616   rvalues  = gen_from->values; /* holds the length of receiving row */
4617   svalues  = gen_to->values;   /* holds the length of sending row */
4618   nrecvs   = gen_from->n;
4619   nsends   = gen_to->n;
4620 
4621   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4622   srow     = gen_to->indices;   /* local row index to be sent */
4623   sstarts  = gen_to->starts;
4624   sprocs   = gen_to->procs;
4625   sstatus  = gen_to->sstatus;
4626   sbs      = gen_to->bs;
4627   rstarts  = gen_from->starts;
4628   rprocs   = gen_from->procs;
4629   rbs      = gen_from->bs;
4630 
4631   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4632   if (scall == MAT_INITIAL_MATRIX){
4633     /* i-array */
4634     /*---------*/
4635     /*  post receives */
4636     for (i=0; i<nrecvs; i++){
4637       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4638       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4639       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4640     }
4641 
4642     /* pack the outgoing message */
4643     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4644     rstartsj = sstartsj + nsends +1;
4645     sstartsj[0] = 0;  rstartsj[0] = 0;
4646     len = 0; /* total length of j or a array to be sent */
4647     k = 0;
4648     for (i=0; i<nsends; i++){
4649       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4650       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4651       for (j=0; j<nrows; j++) {
4652         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4653         for (l=0; l<sbs; l++){
4654           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4655           rowlen[j*sbs+l] = ncols;
4656           len += ncols;
4657           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4658         }
4659         k++;
4660       }
4661       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4662       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4663     }
4664     /* recvs and sends of i-array are completed */
4665     i = nrecvs;
4666     while (i--) {
4667       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4668     }
4669     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4670 
4671     /* allocate buffers for sending j and a arrays */
4672     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4673     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4674 
4675     /* create i-array of B_oth */
4676     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4677     b_othi[0] = 0;
4678     len = 0; /* total length of j or a array to be received */
4679     k = 0;
4680     for (i=0; i<nrecvs; i++){
4681       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4682       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4683       for (j=0; j<nrows; j++) {
4684         b_othi[k+1] = b_othi[k] + rowlen[j];
4685         len += rowlen[j]; k++;
4686       }
4687       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4688     }
4689 
4690     /* allocate space for j and a arrrays of B_oth */
4691     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4692     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4693 
4694     /* j-array */
4695     /*---------*/
4696     /*  post receives of j-array */
4697     for (i=0; i<nrecvs; i++){
4698       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4699       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4700     }
4701 
4702     /* pack the outgoing message j-array */
4703     k = 0;
4704     for (i=0; i<nsends; i++){
4705       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4706       bufJ = bufj+sstartsj[i];
4707       for (j=0; j<nrows; j++) {
4708         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4709         for (ll=0; ll<sbs; ll++){
4710           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4711           for (l=0; l<ncols; l++){
4712             *bufJ++ = cols[l];
4713           }
4714           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4715         }
4716       }
4717       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4718     }
4719 
4720     /* recvs and sends of j-array are completed */
4721     i = nrecvs;
4722     while (i--) {
4723       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4724     }
4725     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4726   } else if (scall == MAT_REUSE_MATRIX){
4727     sstartsj = *startsj;
4728     rstartsj = sstartsj + nsends +1;
4729     bufa     = *bufa_ptr;
4730     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4731     b_otha   = b_oth->a;
4732   } else {
4733     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4734   }
4735 
4736   /* a-array */
4737   /*---------*/
4738   /*  post receives of a-array */
4739   for (i=0; i<nrecvs; i++){
4740     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4741     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4742   }
4743 
4744   /* pack the outgoing message a-array */
4745   k = 0;
4746   for (i=0; i<nsends; i++){
4747     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4748     bufA = bufa+sstartsj[i];
4749     for (j=0; j<nrows; j++) {
4750       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4751       for (ll=0; ll<sbs; ll++){
4752         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4753         for (l=0; l<ncols; l++){
4754           *bufA++ = vals[l];
4755         }
4756         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4757       }
4758     }
4759     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4760   }
4761   /* recvs and sends of a-array are completed */
4762   i = nrecvs;
4763   while (i--) {
4764     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4765   }
4766   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4767   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4768 
4769   if (scall == MAT_INITIAL_MATRIX){
4770     /* put together the new matrix */
4771     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4772 
4773     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4774     /* Since these are PETSc arrays, change flags to free them as necessary. */
4775     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4776     b_oth->free_a  = PETSC_TRUE;
4777     b_oth->free_ij = PETSC_TRUE;
4778     b_oth->nonew   = 0;
4779 
4780     ierr = PetscFree(bufj);CHKERRQ(ierr);
4781     if (!startsj || !bufa_ptr){
4782       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4783       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4784     } else {
4785       *startsj  = sstartsj;
4786       *bufa_ptr = bufa;
4787     }
4788   }
4789   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4790   PetscFunctionReturn(0);
4791 }
4792 
4793 #undef __FUNCT__
4794 #define __FUNCT__ "MatGetCommunicationStructs"
4795 /*@C
4796   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4797 
4798   Not Collective
4799 
4800   Input Parameters:
4801 . A - The matrix in mpiaij format
4802 
4803   Output Parameter:
4804 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4805 . colmap - A map from global column index to local index into lvec
4806 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4807 
4808   Level: developer
4809 
4810 @*/
4811 #if defined (PETSC_USE_CTABLE)
4812 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4813 #else
4814 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4815 #endif
4816 {
4817   Mat_MPIAIJ *a;
4818 
4819   PetscFunctionBegin;
4820   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4821   PetscValidPointer(lvec, 2)
4822   PetscValidPointer(colmap, 3)
4823   PetscValidPointer(multScatter, 4)
4824   a = (Mat_MPIAIJ *) A->data;
4825   if (lvec) *lvec = a->lvec;
4826   if (colmap) *colmap = a->colmap;
4827   if (multScatter) *multScatter = a->Mvctx;
4828   PetscFunctionReturn(0);
4829 }
4830 
4831 EXTERN_C_BEGIN
4832 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*);
4833 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*);
4834 EXTERN_C_END
4835 
4836 #include "src/mat/impls/dense/mpi/mpidense.h"
4837 
4838 #undef __FUNCT__
4839 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4840 /*
4841     Computes (B'*A')' since computing B*A directly is untenable
4842 
4843                n                       p                          p
4844         (              )       (              )         (                  )
4845       m (      A       )  *  n (       B      )   =   m (         C        )
4846         (              )       (              )         (                  )
4847 
4848 */
4849 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4850 {
4851   PetscErrorCode     ierr;
4852   Mat                At,Bt,Ct;
4853 
4854   PetscFunctionBegin;
4855   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4856   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4857   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4858   ierr = MatDestroy(At);CHKERRQ(ierr);
4859   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4860   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4861   ierr = MatDestroy(Ct);CHKERRQ(ierr);
4862   PetscFunctionReturn(0);
4863 }
4864 
4865 #undef __FUNCT__
4866 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4867 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4868 {
4869   PetscErrorCode ierr;
4870   PetscInt       m=A->rmap->n,n=B->cmap->n;
4871   Mat            Cmat;
4872 
4873   PetscFunctionBegin;
4874   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4875   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
4876   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4877   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4878   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
4879   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4880   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4881   *C   = Cmat;
4882   PetscFunctionReturn(0);
4883 }
4884 
4885 /* ----------------------------------------------------------------*/
4886 #undef __FUNCT__
4887 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4888 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4889 {
4890   PetscErrorCode ierr;
4891 
4892   PetscFunctionBegin;
4893   if (scall == MAT_INITIAL_MATRIX){
4894     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4895   }
4896   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4897   PetscFunctionReturn(0);
4898 }
4899 
4900 EXTERN_C_BEGIN
4901 #if defined(PETSC_HAVE_MUMPS)
4902 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*);
4903 #endif
4904 #if defined(PETSC_HAVE_SUPERLU_DIST)
4905 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
4906 #endif
4907 #if defined(PETSC_HAVE_SPOOLES)
4908 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
4909 #endif
4910 EXTERN_C_END
4911 
4912 /*MC
4913    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4914 
4915    Options Database Keys:
4916 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4917 
4918   Level: beginner
4919 
4920 .seealso: MatCreateMPIAIJ()
4921 M*/
4922 
4923 EXTERN_C_BEGIN
4924 #undef __FUNCT__
4925 #define __FUNCT__ "MatCreate_MPIAIJ"
4926 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
4927 {
4928   Mat_MPIAIJ     *b;
4929   PetscErrorCode ierr;
4930   PetscMPIInt    size;
4931 
4932   PetscFunctionBegin;
4933   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
4934 
4935   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
4936   B->data         = (void*)b;
4937   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4938   B->rmap->bs      = 1;
4939   B->assembled    = PETSC_FALSE;
4940   B->mapping      = 0;
4941 
4942   B->insertmode      = NOT_SET_VALUES;
4943   b->size            = size;
4944   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
4945 
4946   /* build cache for off array entries formed */
4947   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
4948   b->donotstash  = PETSC_FALSE;
4949   b->colmap      = 0;
4950   b->garray      = 0;
4951   b->roworiented = PETSC_TRUE;
4952 
4953   /* stuff used for matrix vector multiply */
4954   b->lvec      = PETSC_NULL;
4955   b->Mvctx     = PETSC_NULL;
4956 
4957   /* stuff for MatGetRow() */
4958   b->rowindices   = 0;
4959   b->rowvalues    = 0;
4960   b->getrowactive = PETSC_FALSE;
4961 
4962 #if defined(PETSC_HAVE_SPOOLES)
4963   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_spooles_C",
4964                                      "MatGetFactor_mpiaij_spooles",
4965                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
4966 #endif
4967 #if defined(PETSC_HAVE_MUMPS)
4968   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_mumps_C",
4969                                      "MatGetFactor_mpiaij_mumps",
4970                                      MatGetFactor_mpiaij_mumps);CHKERRQ(ierr);
4971 #endif
4972 #if defined(PETSC_HAVE_SUPERLU_DIST)
4973   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_superlu_dist_C",
4974                                      "MatGetFactor_mpiaij_superlu_dist",
4975                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
4976 #endif
4977   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
4978                                      "MatStoreValues_MPIAIJ",
4979                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
4980   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
4981                                      "MatRetrieveValues_MPIAIJ",
4982                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
4983   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
4984 				     "MatGetDiagonalBlock_MPIAIJ",
4985                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
4986   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
4987 				     "MatIsTranspose_MPIAIJ",
4988 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
4989   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
4990 				     "MatMPIAIJSetPreallocation_MPIAIJ",
4991 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
4992   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
4993 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
4994 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
4995   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
4996 				     "MatDiagonalScaleLocal_MPIAIJ",
4997 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
4998   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
4999                                      "MatConvert_MPIAIJ_MPICSRPERM",
5000                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
5001   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
5002                                      "MatConvert_MPIAIJ_MPICRL",
5003                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
5004   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5005                                      "MatMatMult_MPIDense_MPIAIJ",
5006                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5007   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5008                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5009                                       MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5010   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5011                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5012                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5013   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5014   PetscFunctionReturn(0);
5015 }
5016 EXTERN_C_END
5017 
5018 #undef __FUNCT__
5019 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5020 /*@
5021      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5022          and "off-diagonal" part of the matrix in CSR format.
5023 
5024    Collective on MPI_Comm
5025 
5026    Input Parameters:
5027 +  comm - MPI communicator
5028 .  m - number of local rows (Cannot be PETSC_DECIDE)
5029 .  n - This value should be the same as the local size used in creating the
5030        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5031        calculated if N is given) For square matrices n is almost always m.
5032 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5033 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5034 .   i - row indices for "diagonal" portion of matrix
5035 .   j - column indices
5036 .   a - matrix values
5037 .   oi - row indices for "off-diagonal" portion of matrix
5038 .   oj - column indices
5039 -   oa - matrix values
5040 
5041    Output Parameter:
5042 .   mat - the matrix
5043 
5044    Level: advanced
5045 
5046    Notes:
5047        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5048 
5049        The i and j indices are 0 based
5050 
5051        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5052 
5053 
5054 .keywords: matrix, aij, compressed row, sparse, parallel
5055 
5056 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5057           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5058 @*/
5059 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5060 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5061 {
5062   PetscErrorCode ierr;
5063   Mat_MPIAIJ     *maij;
5064 
5065  PetscFunctionBegin;
5066   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5067   if (i[0]) {
5068     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5069   }
5070   if (oi[0]) {
5071     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5072   }
5073   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5074   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5075   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5076   maij = (Mat_MPIAIJ*) (*mat)->data;
5077   maij->donotstash     = PETSC_TRUE;
5078   (*mat)->preallocated = PETSC_TRUE;
5079 
5080   (*mat)->rmap->bs = (*mat)->cmap->bs = 1;
5081   ierr = PetscMapSetUp((*mat)->rmap);CHKERRQ(ierr);
5082   ierr = PetscMapSetUp((*mat)->cmap);CHKERRQ(ierr);
5083 
5084   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5085   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5086 
5087   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5088   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5089   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5090   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5091 
5092   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5093   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5094   PetscFunctionReturn(0);
5095 }
5096 
5097 /*
5098     Special version for direct calls from Fortran
5099 */
5100 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5101 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5102 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5103 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5104 #endif
5105 
5106 /* Change these macros so can be used in void function */
5107 #undef CHKERRQ
5108 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5109 #undef SETERRQ2
5110 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5111 #undef SETERRQ
5112 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5113 
5114 EXTERN_C_BEGIN
5115 #undef __FUNCT__
5116 #define __FUNCT__ "matsetvaluesmpiaij_"
5117 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5118 {
5119   Mat             mat = *mmat;
5120   PetscInt        m = *mm, n = *mn;
5121   InsertMode      addv = *maddv;
5122   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5123   PetscScalar     value;
5124   PetscErrorCode  ierr;
5125 
5126   MatPreallocated(mat);
5127   if (mat->insertmode == NOT_SET_VALUES) {
5128     mat->insertmode = addv;
5129   }
5130 #if defined(PETSC_USE_DEBUG)
5131   else if (mat->insertmode != addv) {
5132     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5133   }
5134 #endif
5135   {
5136   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5137   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5138   PetscTruth      roworiented = aij->roworiented;
5139 
5140   /* Some Variables required in the macro */
5141   Mat             A = aij->A;
5142   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5143   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5144   MatScalar       *aa = a->a;
5145   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5146   Mat             B = aij->B;
5147   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5148   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5149   MatScalar       *ba = b->a;
5150 
5151   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5152   PetscInt        nonew = a->nonew;
5153   MatScalar       *ap1,*ap2;
5154 
5155   PetscFunctionBegin;
5156   for (i=0; i<m; i++) {
5157     if (im[i] < 0) continue;
5158 #if defined(PETSC_USE_DEBUG)
5159     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5160 #endif
5161     if (im[i] >= rstart && im[i] < rend) {
5162       row      = im[i] - rstart;
5163       lastcol1 = -1;
5164       rp1      = aj + ai[row];
5165       ap1      = aa + ai[row];
5166       rmax1    = aimax[row];
5167       nrow1    = ailen[row];
5168       low1     = 0;
5169       high1    = nrow1;
5170       lastcol2 = -1;
5171       rp2      = bj + bi[row];
5172       ap2      = ba + bi[row];
5173       rmax2    = bimax[row];
5174       nrow2    = bilen[row];
5175       low2     = 0;
5176       high2    = nrow2;
5177 
5178       for (j=0; j<n; j++) {
5179         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5180         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5181         if (in[j] >= cstart && in[j] < cend){
5182           col = in[j] - cstart;
5183           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5184         } else if (in[j] < 0) continue;
5185 #if defined(PETSC_USE_DEBUG)
5186         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5187 #endif
5188         else {
5189           if (mat->was_assembled) {
5190             if (!aij->colmap) {
5191               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5192             }
5193 #if defined (PETSC_USE_CTABLE)
5194             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5195 	    col--;
5196 #else
5197             col = aij->colmap[in[j]] - 1;
5198 #endif
5199             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5200               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5201               col =  in[j];
5202               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5203               B = aij->B;
5204               b = (Mat_SeqAIJ*)B->data;
5205               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5206               rp2      = bj + bi[row];
5207               ap2      = ba + bi[row];
5208               rmax2    = bimax[row];
5209               nrow2    = bilen[row];
5210               low2     = 0;
5211               high2    = nrow2;
5212               bm       = aij->B->rmap->n;
5213               ba = b->a;
5214             }
5215           } else col = in[j];
5216           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5217         }
5218       }
5219     } else {
5220       if (!aij->donotstash) {
5221         if (roworiented) {
5222           if (ignorezeroentries && v[i*n] == 0.0) continue;
5223           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
5224         } else {
5225           if (ignorezeroentries && v[i] == 0.0) continue;
5226           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
5227         }
5228       }
5229     }
5230   }}
5231   PetscFunctionReturnVoid();
5232 }
5233 EXTERN_C_END
5234 
5235