xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 6abce2d9d736c45116d2d0cb3c19d818c8c1a66b)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "../src/inline/spops.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscTruth     aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap->n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap->n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap->n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap->n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 
207 #define CHUNKSIZE   15
208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
209 { \
210     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
211     lastcol1 = col;\
212     while (high1-low1 > 5) { \
213       t = (low1+high1)/2; \
214       if (rp1[t] > col) high1 = t; \
215       else             low1  = t; \
216     } \
217       for (_i=low1; _i<high1; _i++) { \
218         if (rp1[_i] > col) break; \
219         if (rp1[_i] == col) { \
220           if (addv == ADD_VALUES) ap1[_i] += value;   \
221           else                    ap1[_i] = value; \
222           goto a_noinsert; \
223         } \
224       }  \
225       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
226       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
227       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
228       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
229       N = nrow1++ - 1; a->nz++; high1++; \
230       /* shift up all the later entries in this row */ \
231       for (ii=N; ii>=_i; ii--) { \
232         rp1[ii+1] = rp1[ii]; \
233         ap1[ii+1] = ap1[ii]; \
234       } \
235       rp1[_i] = col;  \
236       ap1[_i] = value;  \
237       a_noinsert: ; \
238       ailen[row] = nrow1; \
239 }
240 
241 
242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
243 { \
244     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
245     lastcol2 = col;\
246     while (high2-low2 > 5) { \
247       t = (low2+high2)/2; \
248       if (rp2[t] > col) high2 = t; \
249       else             low2  = t; \
250     } \
251     for (_i=low2; _i<high2; _i++) {		\
252       if (rp2[_i] > col) break;			\
253       if (rp2[_i] == col) {			      \
254 	if (addv == ADD_VALUES) ap2[_i] += value;     \
255 	else                    ap2[_i] = value;      \
256 	goto b_noinsert;			      \
257       }						      \
258     }							      \
259     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
260     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
261     if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
262     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
263     N = nrow2++ - 1; b->nz++; high2++;					\
264     /* shift up all the later entries in this row */			\
265     for (ii=N; ii>=_i; ii--) {						\
266       rp2[ii+1] = rp2[ii];						\
267       ap2[ii+1] = ap2[ii];						\
268     }									\
269     rp2[_i] = col;							\
270     ap2[_i] = value;							\
271     b_noinsert: ;								\
272     bilen[row] = nrow2;							\
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
278 {
279   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
280   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
281   PetscErrorCode ierr;
282   PetscInt       l,*garray = mat->garray,diag;
283 
284   PetscFunctionBegin;
285   /* code only works for square matrices A */
286 
287   /* find size of row to the left of the diagonal part */
288   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
289   row  = row - diag;
290   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
291     if (garray[b->j[b->i[row]+l]] > diag) break;
292   }
293   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
294 
295   /* diagonal part */
296   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
297 
298   /* right of diagonal part */
299   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
300   PetscFunctionReturn(0);
301 }
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "MatSetValues_MPIAIJ"
305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
306 {
307   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
308   PetscScalar    value;
309   PetscErrorCode ierr;
310   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
311   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
312   PetscTruth     roworiented = aij->roworiented;
313 
314   /* Some Variables required in the macro */
315   Mat            A = aij->A;
316   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
317   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
318   MatScalar      *aa = a->a;
319   PetscTruth     ignorezeroentries = a->ignorezeroentries;
320   Mat            B = aij->B;
321   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
322   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
323   MatScalar      *ba = b->a;
324 
325   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
326   PetscInt       nonew = a->nonew;
327   MatScalar      *ap1,*ap2;
328 
329   PetscFunctionBegin;
330   for (i=0; i<m; i++) {
331     if (im[i] < 0) continue;
332 #if defined(PETSC_USE_DEBUG)
333     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
334 #endif
335     if (im[i] >= rstart && im[i] < rend) {
336       row      = im[i] - rstart;
337       lastcol1 = -1;
338       rp1      = aj + ai[row];
339       ap1      = aa + ai[row];
340       rmax1    = aimax[row];
341       nrow1    = ailen[row];
342       low1     = 0;
343       high1    = nrow1;
344       lastcol2 = -1;
345       rp2      = bj + bi[row];
346       ap2      = ba + bi[row];
347       rmax2    = bimax[row];
348       nrow2    = bilen[row];
349       low2     = 0;
350       high2    = nrow2;
351 
352       for (j=0; j<n; j++) {
353         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
354         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
355         if (in[j] >= cstart && in[j] < cend){
356           col = in[j] - cstart;
357           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
358         } else if (in[j] < 0) continue;
359 #if defined(PETSC_USE_DEBUG)
360         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
361 #endif
362         else {
363           if (mat->was_assembled) {
364             if (!aij->colmap) {
365               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
366             }
367 #if defined (PETSC_USE_CTABLE)
368             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
369 	    col--;
370 #else
371             col = aij->colmap[in[j]] - 1;
372 #endif
373             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
374               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
375               col =  in[j];
376               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
377               B = aij->B;
378               b = (Mat_SeqAIJ*)B->data;
379               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
380               rp2      = bj + bi[row];
381               ap2      = ba + bi[row];
382               rmax2    = bimax[row];
383               nrow2    = bilen[row];
384               low2     = 0;
385               high2    = nrow2;
386               bm       = aij->B->rmap->n;
387               ba = b->a;
388             }
389           } else col = in[j];
390           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
391         }
392       }
393     } else {
394       if (!aij->donotstash) {
395         if (roworiented) {
396           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
397         } else {
398           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
399         }
400       }
401     }
402   }
403   PetscFunctionReturn(0);
404 }
405 
406 #undef __FUNCT__
407 #define __FUNCT__ "MatGetValues_MPIAIJ"
408 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
409 {
410   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
413   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
414 
415   PetscFunctionBegin;
416   for (i=0; i<m; i++) {
417     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
418     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
419     if (idxm[i] >= rstart && idxm[i] < rend) {
420       row = idxm[i] - rstart;
421       for (j=0; j<n; j++) {
422         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
423         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
424         if (idxn[j] >= cstart && idxn[j] < cend){
425           col = idxn[j] - cstart;
426           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
427         } else {
428           if (!aij->colmap) {
429             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
430           }
431 #if defined (PETSC_USE_CTABLE)
432           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
433           col --;
434 #else
435           col = aij->colmap[idxn[j]] - 1;
436 #endif
437           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
438           else {
439             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
440           }
441         }
442       }
443     } else {
444       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
445     }
446   }
447   PetscFunctionReturn(0);
448 }
449 
450 #undef __FUNCT__
451 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
452 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
453 {
454   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
455   PetscErrorCode ierr;
456   PetscInt       nstash,reallocs;
457   InsertMode     addv;
458 
459   PetscFunctionBegin;
460   if (aij->donotstash) {
461     PetscFunctionReturn(0);
462   }
463 
464   /* make sure all processors are either in INSERTMODE or ADDMODE */
465   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
466   if (addv == (ADD_VALUES|INSERT_VALUES)) {
467     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
468   }
469   mat->insertmode = addv; /* in case this processor had no cache */
470 
471   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
472   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
473   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
474   PetscFunctionReturn(0);
475 }
476 
477 #undef __FUNCT__
478 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
479 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
480 {
481   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
482   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
483   PetscErrorCode ierr;
484   PetscMPIInt    n;
485   PetscInt       i,j,rstart,ncols,flg;
486   PetscInt       *row,*col;
487   PetscTruth     other_disassembled;
488   PetscScalar    *val;
489   InsertMode     addv = mat->insertmode;
490 
491   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
492   PetscFunctionBegin;
493   if (!aij->donotstash) {
494     while (1) {
495       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
496       if (!flg) break;
497 
498       for (i=0; i<n;) {
499         /* Now identify the consecutive vals belonging to the same row */
500         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
501         if (j < n) ncols = j-i;
502         else       ncols = n-i;
503         /* Now assemble all these values with a single function call */
504         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
505         i = j;
506       }
507     }
508     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
509   }
510   a->compressedrow.use     = PETSC_FALSE;
511   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
512   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
513 
514   /* determine if any processor has disassembled, if so we must
515      also disassemble ourselfs, in order that we may reassemble. */
516   /*
517      if nonzero structure of submatrix B cannot change then we know that
518      no processor disassembled thus we can skip this stuff
519   */
520   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
521     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
522     if (mat->was_assembled && !other_disassembled) {
523       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
524     }
525   }
526   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
527     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
528   }
529   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
530   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
531   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
532   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
533 
534   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
535   aij->rowvalues = 0;
536 
537   /* used by MatAXPY() */
538   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
539   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
540 
541   PetscFunctionReturn(0);
542 }
543 
544 #undef __FUNCT__
545 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
546 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
547 {
548   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
549   PetscErrorCode ierr;
550 
551   PetscFunctionBegin;
552   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
553   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
554   PetscFunctionReturn(0);
555 }
556 
557 #undef __FUNCT__
558 #define __FUNCT__ "MatZeroRows_MPIAIJ"
559 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
560 {
561   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
562   PetscErrorCode ierr;
563   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
564   PetscInt       i,*owners = A->rmap->range;
565   PetscInt       *nprocs,j,idx,nsends,row;
566   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
567   PetscInt       *rvalues,count,base,slen,*source;
568   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
569   MPI_Comm       comm = ((PetscObject)A)->comm;
570   MPI_Request    *send_waits,*recv_waits;
571   MPI_Status     recv_status,*send_status;
572 #if defined(PETSC_DEBUG)
573   PetscTruth     found = PETSC_FALSE;
574 #endif
575 
576   PetscFunctionBegin;
577   /*  first count number of contributors to each processor */
578   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
579   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
580   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
581   j = 0;
582   for (i=0; i<N; i++) {
583     if (lastidx > (idx = rows[i])) j = 0;
584     lastidx = idx;
585     for (; j<size; j++) {
586       if (idx >= owners[j] && idx < owners[j+1]) {
587         nprocs[2*j]++;
588         nprocs[2*j+1] = 1;
589         owner[i] = j;
590 #if defined(PETSC_DEBUG)
591         found = PETSC_TRUE;
592 #endif
593         break;
594       }
595     }
596 #if defined(PETSC_DEBUG)
597     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
598     found = PETSC_FALSE;
599 #endif
600   }
601   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
602 
603   /* inform other processors of number of messages and max length*/
604   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
605 
606   /* post receives:   */
607   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
608   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
609   for (i=0; i<nrecvs; i++) {
610     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
611   }
612 
613   /* do sends:
614       1) starts[i] gives the starting index in svalues for stuff going to
615          the ith processor
616   */
617   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
618   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
619   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
620   starts[0] = 0;
621   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
622   for (i=0; i<N; i++) {
623     svalues[starts[owner[i]]++] = rows[i];
624   }
625 
626   starts[0] = 0;
627   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
628   count = 0;
629   for (i=0; i<size; i++) {
630     if (nprocs[2*i+1]) {
631       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
632     }
633   }
634   ierr = PetscFree(starts);CHKERRQ(ierr);
635 
636   base = owners[rank];
637 
638   /*  wait on receives */
639   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
640   source = lens + nrecvs;
641   count  = nrecvs; slen = 0;
642   while (count) {
643     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
644     /* unpack receives into our local space */
645     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
646     source[imdex]  = recv_status.MPI_SOURCE;
647     lens[imdex]    = n;
648     slen          += n;
649     count--;
650   }
651   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
652 
653   /* move the data into the send scatter */
654   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
655   count = 0;
656   for (i=0; i<nrecvs; i++) {
657     values = rvalues + i*nmax;
658     for (j=0; j<lens[i]; j++) {
659       lrows[count++] = values[j] - base;
660     }
661   }
662   ierr = PetscFree(rvalues);CHKERRQ(ierr);
663   ierr = PetscFree(lens);CHKERRQ(ierr);
664   ierr = PetscFree(owner);CHKERRQ(ierr);
665   ierr = PetscFree(nprocs);CHKERRQ(ierr);
666 
667   /* actually zap the local rows */
668   /*
669         Zero the required rows. If the "diagonal block" of the matrix
670      is square and the user wishes to set the diagonal we use separate
671      code so that MatSetValues() is not called for each diagonal allocating
672      new memory, thus calling lots of mallocs and slowing things down.
673 
674        Contributed by: Matthew Knepley
675   */
676   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
677   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
678   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
679     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
680   } else if (diag != 0.0) {
681     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
682     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
683       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
684 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
685     }
686     for (i = 0; i < slen; i++) {
687       row  = lrows[i] + rstart;
688       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
689     }
690     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
691     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
692   } else {
693     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
694   }
695   ierr = PetscFree(lrows);CHKERRQ(ierr);
696 
697   /* wait on sends */
698   if (nsends) {
699     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
700     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
701     ierr = PetscFree(send_status);CHKERRQ(ierr);
702   }
703   ierr = PetscFree(send_waits);CHKERRQ(ierr);
704   ierr = PetscFree(svalues);CHKERRQ(ierr);
705 
706   PetscFunctionReturn(0);
707 }
708 
709 #undef __FUNCT__
710 #define __FUNCT__ "MatMult_MPIAIJ"
711 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
712 {
713   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
714   PetscErrorCode ierr;
715   PetscInt       nt;
716 
717   PetscFunctionBegin;
718   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
719   if (nt != A->cmap->n) {
720     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
721   }
722   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
723   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
724   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
725   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
726   PetscFunctionReturn(0);
727 }
728 
729 #undef __FUNCT__
730 #define __FUNCT__ "MatMultAdd_MPIAIJ"
731 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
732 {
733   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
734   PetscErrorCode ierr;
735 
736   PetscFunctionBegin;
737   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
738   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
739   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
740   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
741   PetscFunctionReturn(0);
742 }
743 
744 #undef __FUNCT__
745 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
746 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
747 {
748   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
749   PetscErrorCode ierr;
750   PetscTruth     merged;
751 
752   PetscFunctionBegin;
753   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
754   /* do nondiagonal part */
755   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
756   if (!merged) {
757     /* send it on its way */
758     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
759     /* do local part */
760     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
761     /* receive remote parts: note this assumes the values are not actually */
762     /* added in yy until the next line, */
763     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
764   } else {
765     /* do local part */
766     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
767     /* send it on its way */
768     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
769     /* values actually were received in the Begin() but we need to call this nop */
770     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
771   }
772   PetscFunctionReturn(0);
773 }
774 
775 EXTERN_C_BEGIN
776 #undef __FUNCT__
777 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
778 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
779 {
780   MPI_Comm       comm;
781   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
782   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
783   IS             Me,Notme;
784   PetscErrorCode ierr;
785   PetscInt       M,N,first,last,*notme,i;
786   PetscMPIInt    size;
787 
788   PetscFunctionBegin;
789 
790   /* Easy test: symmetric diagonal block */
791   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
792   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
793   if (!*f) PetscFunctionReturn(0);
794   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
795   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
796   if (size == 1) PetscFunctionReturn(0);
797 
798   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
799   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
800   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
801   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
802   for (i=0; i<first; i++) notme[i] = i;
803   for (i=last; i<M; i++) notme[i-last+first] = i;
804   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
805   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
806   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
807   Aoff = Aoffs[0];
808   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
809   Boff = Boffs[0];
810   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
811   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
812   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
813   ierr = ISDestroy(Me);CHKERRQ(ierr);
814   ierr = ISDestroy(Notme);CHKERRQ(ierr);
815 
816   PetscFunctionReturn(0);
817 }
818 EXTERN_C_END
819 
820 #undef __FUNCT__
821 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
822 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
823 {
824   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
825   PetscErrorCode ierr;
826 
827   PetscFunctionBegin;
828   /* do nondiagonal part */
829   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
830   /* send it on its way */
831   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
832   /* do local part */
833   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
834   /* receive remote parts */
835   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
836   PetscFunctionReturn(0);
837 }
838 
839 /*
840   This only works correctly for square matrices where the subblock A->A is the
841    diagonal block
842 */
843 #undef __FUNCT__
844 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
845 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
846 {
847   PetscErrorCode ierr;
848   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
849 
850   PetscFunctionBegin;
851   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
852   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) {
853     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
854   }
855   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
856   PetscFunctionReturn(0);
857 }
858 
859 #undef __FUNCT__
860 #define __FUNCT__ "MatScale_MPIAIJ"
861 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
862 {
863   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
864   PetscErrorCode ierr;
865 
866   PetscFunctionBegin;
867   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
868   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
869   PetscFunctionReturn(0);
870 }
871 
872 #undef __FUNCT__
873 #define __FUNCT__ "MatDestroy_MPIAIJ"
874 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
875 {
876   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
877   PetscErrorCode ierr;
878 
879   PetscFunctionBegin;
880 #if defined(PETSC_USE_LOG)
881   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
882 #endif
883   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
884   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
885   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
886 #if defined (PETSC_USE_CTABLE)
887   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
888 #else
889   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
890 #endif
891   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
892   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
893   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
894   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
895   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
896   ierr = PetscFree(aij);CHKERRQ(ierr);
897 
898   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
899   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
900   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
901   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
902   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
903   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
904   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
905   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
906   PetscFunctionReturn(0);
907 }
908 
909 #undef __FUNCT__
910 #define __FUNCT__ "MatView_MPIAIJ_Binary"
911 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
912 {
913   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
914   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
915   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
916   PetscErrorCode    ierr;
917   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
918   int               fd;
919   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
920   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
921   PetscScalar       *column_values;
922 
923   PetscFunctionBegin;
924   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
925   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
926   nz   = A->nz + B->nz;
927   if (!rank) {
928     header[0] = MAT_FILE_COOKIE;
929     header[1] = mat->rmap->N;
930     header[2] = mat->cmap->N;
931     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
932     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
933     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
934     /* get largest number of rows any processor has */
935     rlen = mat->rmap->n;
936     range = mat->rmap->range;
937     for (i=1; i<size; i++) {
938       rlen = PetscMax(rlen,range[i+1] - range[i]);
939     }
940   } else {
941     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
942     rlen = mat->rmap->n;
943   }
944 
945   /* load up the local row counts */
946   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
947   for (i=0; i<mat->rmap->n; i++) {
948     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
949   }
950 
951   /* store the row lengths to the file */
952   if (!rank) {
953     MPI_Status status;
954     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
955     for (i=1; i<size; i++) {
956       rlen = range[i+1] - range[i];
957       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
958       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
959     }
960   } else {
961     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
962   }
963   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
964 
965   /* load up the local column indices */
966   nzmax = nz; /* )th processor needs space a largest processor needs */
967   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
968   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
969   cnt  = 0;
970   for (i=0; i<mat->rmap->n; i++) {
971     for (j=B->i[i]; j<B->i[i+1]; j++) {
972       if ( (col = garray[B->j[j]]) > cstart) break;
973       column_indices[cnt++] = col;
974     }
975     for (k=A->i[i]; k<A->i[i+1]; k++) {
976       column_indices[cnt++] = A->j[k] + cstart;
977     }
978     for (; j<B->i[i+1]; j++) {
979       column_indices[cnt++] = garray[B->j[j]];
980     }
981   }
982   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
983 
984   /* store the column indices to the file */
985   if (!rank) {
986     MPI_Status status;
987     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
988     for (i=1; i<size; i++) {
989       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
990       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
991       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
992       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
993     }
994   } else {
995     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
996     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
997   }
998   ierr = PetscFree(column_indices);CHKERRQ(ierr);
999 
1000   /* load up the local column values */
1001   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1002   cnt  = 0;
1003   for (i=0; i<mat->rmap->n; i++) {
1004     for (j=B->i[i]; j<B->i[i+1]; j++) {
1005       if ( garray[B->j[j]] > cstart) break;
1006       column_values[cnt++] = B->a[j];
1007     }
1008     for (k=A->i[i]; k<A->i[i+1]; k++) {
1009       column_values[cnt++] = A->a[k];
1010     }
1011     for (; j<B->i[i+1]; j++) {
1012       column_values[cnt++] = B->a[j];
1013     }
1014   }
1015   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1016 
1017   /* store the column values to the file */
1018   if (!rank) {
1019     MPI_Status status;
1020     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1021     for (i=1; i<size; i++) {
1022       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1023       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1024       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1025       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1026     }
1027   } else {
1028     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1029     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1030   }
1031   ierr = PetscFree(column_values);CHKERRQ(ierr);
1032   PetscFunctionReturn(0);
1033 }
1034 
1035 #undef __FUNCT__
1036 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1037 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1038 {
1039   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1040   PetscErrorCode    ierr;
1041   PetscMPIInt       rank = aij->rank,size = aij->size;
1042   PetscTruth        isdraw,iascii,isbinary;
1043   PetscViewer       sviewer;
1044   PetscViewerFormat format;
1045 
1046   PetscFunctionBegin;
1047   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1048   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1049   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1050   if (iascii) {
1051     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1052     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1053       MatInfo    info;
1054       PetscTruth inodes;
1055 
1056       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1057       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1058       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1059       if (!inodes) {
1060         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1061 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1062       } else {
1063         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1064 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1065       }
1066       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1067       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1068       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1069       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1070       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1071       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1072       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1073       PetscFunctionReturn(0);
1074     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1075       PetscInt   inodecount,inodelimit,*inodes;
1076       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1077       if (inodes) {
1078         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1079       } else {
1080         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1081       }
1082       PetscFunctionReturn(0);
1083     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1084       PetscFunctionReturn(0);
1085     }
1086   } else if (isbinary) {
1087     if (size == 1) {
1088       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1089       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1090     } else {
1091       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1092     }
1093     PetscFunctionReturn(0);
1094   } else if (isdraw) {
1095     PetscDraw  draw;
1096     PetscTruth isnull;
1097     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1098     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1099   }
1100 
1101   if (size == 1) {
1102     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1103     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1104   } else {
1105     /* assemble the entire matrix onto first processor. */
1106     Mat         A;
1107     Mat_SeqAIJ  *Aloc;
1108     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1109     MatScalar   *a;
1110 
1111     if (mat->rmap->N > 1024) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 512 rows, use binary format instead");
1112 
1113     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1114     if (!rank) {
1115       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1116     } else {
1117       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1118     }
1119     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1120     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1121     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1122     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1123 
1124     /* copy over the A part */
1125     Aloc = (Mat_SeqAIJ*)aij->A->data;
1126     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1127     row = mat->rmap->rstart;
1128     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1129     for (i=0; i<m; i++) {
1130       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1131       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1132     }
1133     aj = Aloc->j;
1134     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1135 
1136     /* copy over the B part */
1137     Aloc = (Mat_SeqAIJ*)aij->B->data;
1138     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1139     row  = mat->rmap->rstart;
1140     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1141     ct   = cols;
1142     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1143     for (i=0; i<m; i++) {
1144       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1145       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1146     }
1147     ierr = PetscFree(ct);CHKERRQ(ierr);
1148     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1149     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1150     /*
1151        Everyone has to call to draw the matrix since the graphics waits are
1152        synchronized across all processors that share the PetscDraw object
1153     */
1154     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1155     if (!rank) {
1156       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1157       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1158     }
1159     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1160     ierr = MatDestroy(A);CHKERRQ(ierr);
1161   }
1162   PetscFunctionReturn(0);
1163 }
1164 
1165 #undef __FUNCT__
1166 #define __FUNCT__ "MatView_MPIAIJ"
1167 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1168 {
1169   PetscErrorCode ierr;
1170   PetscTruth     iascii,isdraw,issocket,isbinary;
1171 
1172   PetscFunctionBegin;
1173   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1174   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1175   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1176   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1177   if (iascii || isdraw || isbinary || issocket) {
1178     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1179   } else {
1180     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1181   }
1182   PetscFunctionReturn(0);
1183 }
1184 
1185 #undef __FUNCT__
1186 #define __FUNCT__ "MatRelax_MPIAIJ"
1187 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1188 {
1189   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1190   PetscErrorCode ierr;
1191   Vec            bb1;
1192 
1193   PetscFunctionBegin;
1194   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1195 
1196   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1197     if (flag & SOR_ZERO_INITIAL_GUESS) {
1198       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1199       its--;
1200     }
1201 
1202     while (its--) {
1203       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1204       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1205 
1206       /* update rhs: bb1 = bb - B*x */
1207       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1208       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1209 
1210       /* local sweep */
1211       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
1212     }
1213   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1214     if (flag & SOR_ZERO_INITIAL_GUESS) {
1215       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1216       its--;
1217     }
1218     while (its--) {
1219       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1220       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1221 
1222       /* update rhs: bb1 = bb - B*x */
1223       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1224       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1225 
1226       /* local sweep */
1227       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1228     }
1229   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1230     if (flag & SOR_ZERO_INITIAL_GUESS) {
1231       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1232       its--;
1233     }
1234     while (its--) {
1235       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1236       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1237 
1238       /* update rhs: bb1 = bb - B*x */
1239       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1240       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1241 
1242       /* local sweep */
1243       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1244     }
1245   } else {
1246     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1247   }
1248 
1249   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 #undef __FUNCT__
1254 #define __FUNCT__ "MatPermute_MPIAIJ"
1255 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1256 {
1257   MPI_Comm       comm,pcomm;
1258   PetscInt       first,local_size,nrows;
1259   const PetscInt *rows;
1260   int            ntids;
1261   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1262   PetscErrorCode ierr;
1263 
1264   PetscFunctionBegin;
1265   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1266   /* make a collective version of 'rowp' */
1267   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1268   if (pcomm==comm) {
1269     crowp = rowp;
1270   } else {
1271     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1272     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1273     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1274     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1275   }
1276   /* collect the global row permutation and invert it */
1277   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1278   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1279   if (pcomm!=comm) {
1280     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1281   }
1282   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1283   /* get the local target indices */
1284   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1285   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1286   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1287   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1288   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1289   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1290   /* the column permutation is so much easier;
1291      make a local version of 'colp' and invert it */
1292   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1293   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1294   if (ntids==1) {
1295     lcolp = colp;
1296   } else {
1297     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1298     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1299     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1300   }
1301   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1302   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1303   if (ntids>1) {
1304     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1305     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1306   }
1307   /* now we just get the submatrix */
1308   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1309   /* clean up */
1310   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1311   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1312   PetscFunctionReturn(0);
1313 }
1314 
1315 #undef __FUNCT__
1316 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1317 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1318 {
1319   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1320   Mat            A = mat->A,B = mat->B;
1321   PetscErrorCode ierr;
1322   PetscReal      isend[5],irecv[5];
1323 
1324   PetscFunctionBegin;
1325   info->block_size     = 1.0;
1326   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1327   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1328   isend[3] = info->memory;  isend[4] = info->mallocs;
1329   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1330   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1331   isend[3] += info->memory;  isend[4] += info->mallocs;
1332   if (flag == MAT_LOCAL) {
1333     info->nz_used      = isend[0];
1334     info->nz_allocated = isend[1];
1335     info->nz_unneeded  = isend[2];
1336     info->memory       = isend[3];
1337     info->mallocs      = isend[4];
1338   } else if (flag == MAT_GLOBAL_MAX) {
1339     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1340     info->nz_used      = irecv[0];
1341     info->nz_allocated = irecv[1];
1342     info->nz_unneeded  = irecv[2];
1343     info->memory       = irecv[3];
1344     info->mallocs      = irecv[4];
1345   } else if (flag == MAT_GLOBAL_SUM) {
1346     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1347     info->nz_used      = irecv[0];
1348     info->nz_allocated = irecv[1];
1349     info->nz_unneeded  = irecv[2];
1350     info->memory       = irecv[3];
1351     info->mallocs      = irecv[4];
1352   }
1353   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1354   info->fill_ratio_needed = 0;
1355   info->factor_mallocs    = 0;
1356 
1357   PetscFunctionReturn(0);
1358 }
1359 
1360 #undef __FUNCT__
1361 #define __FUNCT__ "MatSetOption_MPIAIJ"
1362 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1363 {
1364   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1365   PetscErrorCode ierr;
1366 
1367   PetscFunctionBegin;
1368   switch (op) {
1369   case MAT_NEW_NONZERO_LOCATIONS:
1370   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1371   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1372   case MAT_KEEP_ZEROED_ROWS:
1373   case MAT_NEW_NONZERO_LOCATION_ERR:
1374   case MAT_USE_INODES:
1375   case MAT_IGNORE_ZERO_ENTRIES:
1376     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1377     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1378     break;
1379   case MAT_ROW_ORIENTED:
1380     a->roworiented = flg;
1381     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1382     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1383     break;
1384   case MAT_NEW_DIAGONALS:
1385     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1386     break;
1387   case MAT_IGNORE_OFF_PROC_ENTRIES:
1388     a->donotstash = PETSC_TRUE;
1389     break;
1390   case MAT_SYMMETRIC:
1391     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1392     break;
1393   case MAT_STRUCTURALLY_SYMMETRIC:
1394   case MAT_HERMITIAN:
1395   case MAT_SYMMETRY_ETERNAL:
1396     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1397     break;
1398   default:
1399     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1400   }
1401   PetscFunctionReturn(0);
1402 }
1403 
1404 #undef __FUNCT__
1405 #define __FUNCT__ "MatGetRow_MPIAIJ"
1406 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1407 {
1408   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1409   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1410   PetscErrorCode ierr;
1411   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1412   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1413   PetscInt       *cmap,*idx_p;
1414 
1415   PetscFunctionBegin;
1416   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1417   mat->getrowactive = PETSC_TRUE;
1418 
1419   if (!mat->rowvalues && (idx || v)) {
1420     /*
1421         allocate enough space to hold information from the longest row.
1422     */
1423     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1424     PetscInt     max = 1,tmp;
1425     for (i=0; i<matin->rmap->n; i++) {
1426       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1427       if (max < tmp) { max = tmp; }
1428     }
1429     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1430     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1431   }
1432 
1433   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1434   lrow = row - rstart;
1435 
1436   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1437   if (!v)   {pvA = 0; pvB = 0;}
1438   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1439   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1440   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1441   nztot = nzA + nzB;
1442 
1443   cmap  = mat->garray;
1444   if (v  || idx) {
1445     if (nztot) {
1446       /* Sort by increasing column numbers, assuming A and B already sorted */
1447       PetscInt imark = -1;
1448       if (v) {
1449         *v = v_p = mat->rowvalues;
1450         for (i=0; i<nzB; i++) {
1451           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1452           else break;
1453         }
1454         imark = i;
1455         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1456         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1457       }
1458       if (idx) {
1459         *idx = idx_p = mat->rowindices;
1460         if (imark > -1) {
1461           for (i=0; i<imark; i++) {
1462             idx_p[i] = cmap[cworkB[i]];
1463           }
1464         } else {
1465           for (i=0; i<nzB; i++) {
1466             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1467             else break;
1468           }
1469           imark = i;
1470         }
1471         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1472         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1473       }
1474     } else {
1475       if (idx) *idx = 0;
1476       if (v)   *v   = 0;
1477     }
1478   }
1479   *nz = nztot;
1480   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1481   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1482   PetscFunctionReturn(0);
1483 }
1484 
1485 #undef __FUNCT__
1486 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1487 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1488 {
1489   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1490 
1491   PetscFunctionBegin;
1492   if (!aij->getrowactive) {
1493     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1494   }
1495   aij->getrowactive = PETSC_FALSE;
1496   PetscFunctionReturn(0);
1497 }
1498 
1499 #undef __FUNCT__
1500 #define __FUNCT__ "MatNorm_MPIAIJ"
1501 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1502 {
1503   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1504   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1505   PetscErrorCode ierr;
1506   PetscInt       i,j,cstart = mat->cmap->rstart;
1507   PetscReal      sum = 0.0;
1508   MatScalar      *v;
1509 
1510   PetscFunctionBegin;
1511   if (aij->size == 1) {
1512     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1513   } else {
1514     if (type == NORM_FROBENIUS) {
1515       v = amat->a;
1516       for (i=0; i<amat->nz; i++) {
1517 #if defined(PETSC_USE_COMPLEX)
1518         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1519 #else
1520         sum += (*v)*(*v); v++;
1521 #endif
1522       }
1523       v = bmat->a;
1524       for (i=0; i<bmat->nz; i++) {
1525 #if defined(PETSC_USE_COMPLEX)
1526         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1527 #else
1528         sum += (*v)*(*v); v++;
1529 #endif
1530       }
1531       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1532       *norm = sqrt(*norm);
1533     } else if (type == NORM_1) { /* max column norm */
1534       PetscReal *tmp,*tmp2;
1535       PetscInt  *jj,*garray = aij->garray;
1536       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1537       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1538       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1539       *norm = 0.0;
1540       v = amat->a; jj = amat->j;
1541       for (j=0; j<amat->nz; j++) {
1542         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1543       }
1544       v = bmat->a; jj = bmat->j;
1545       for (j=0; j<bmat->nz; j++) {
1546         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1547       }
1548       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1549       for (j=0; j<mat->cmap->N; j++) {
1550         if (tmp2[j] > *norm) *norm = tmp2[j];
1551       }
1552       ierr = PetscFree(tmp);CHKERRQ(ierr);
1553       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1554     } else if (type == NORM_INFINITY) { /* max row norm */
1555       PetscReal ntemp = 0.0;
1556       for (j=0; j<aij->A->rmap->n; j++) {
1557         v = amat->a + amat->i[j];
1558         sum = 0.0;
1559         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1560           sum += PetscAbsScalar(*v); v++;
1561         }
1562         v = bmat->a + bmat->i[j];
1563         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1564           sum += PetscAbsScalar(*v); v++;
1565         }
1566         if (sum > ntemp) ntemp = sum;
1567       }
1568       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1569     } else {
1570       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1571     }
1572   }
1573   PetscFunctionReturn(0);
1574 }
1575 
1576 #undef __FUNCT__
1577 #define __FUNCT__ "MatTranspose_MPIAIJ"
1578 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1579 {
1580   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1581   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1582   PetscErrorCode ierr;
1583   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1584   PetscInt       cstart=A->cmap->rstart,ncol;
1585   Mat            B;
1586   MatScalar      *array;
1587 
1588   PetscFunctionBegin;
1589   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1590 
1591   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1592   ai = Aloc->i; aj = Aloc->j;
1593   bi = Bloc->i; bj = Bloc->j;
1594   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1595     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1596     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1597     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1598     for (i=0; i<ai[ma]; i++){
1599       d_nnz[aj[i]] ++;
1600       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1601     }
1602 
1603     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1604     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1605     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1606     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1607     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1608   } else {
1609     B = *matout;
1610   }
1611 
1612   /* copy over the A part */
1613   array = Aloc->a;
1614   row = A->rmap->rstart;
1615   for (i=0; i<ma; i++) {
1616     ncol = ai[i+1]-ai[i];
1617     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1618     row++; array += ncol; aj += ncol;
1619   }
1620   aj = Aloc->j;
1621   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1622 
1623   /* copy over the B part */
1624   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1625   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1626   array = Bloc->a;
1627   row = A->rmap->rstart;
1628   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1629   cols_tmp = cols;
1630   for (i=0; i<mb; i++) {
1631     ncol = bi[i+1]-bi[i];
1632     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1633     row++; array += ncol; cols_tmp += ncol;
1634   }
1635   ierr = PetscFree(cols);CHKERRQ(ierr);
1636 
1637   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1638   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1639   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1640     *matout = B;
1641   } else {
1642     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1643   }
1644   PetscFunctionReturn(0);
1645 }
1646 
1647 #undef __FUNCT__
1648 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1649 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1650 {
1651   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1652   Mat            a = aij->A,b = aij->B;
1653   PetscErrorCode ierr;
1654   PetscInt       s1,s2,s3;
1655 
1656   PetscFunctionBegin;
1657   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1658   if (rr) {
1659     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1660     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1661     /* Overlap communication with computation. */
1662     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1663   }
1664   if (ll) {
1665     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1666     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1667     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1668   }
1669   /* scale  the diagonal block */
1670   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1671 
1672   if (rr) {
1673     /* Do a scatter end and then right scale the off-diagonal block */
1674     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1675     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1676   }
1677 
1678   PetscFunctionReturn(0);
1679 }
1680 
1681 #undef __FUNCT__
1682 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1683 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1684 {
1685   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1686   PetscErrorCode ierr;
1687 
1688   PetscFunctionBegin;
1689   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1690   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1691   PetscFunctionReturn(0);
1692 }
1693 #undef __FUNCT__
1694 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1695 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1696 {
1697   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1698   PetscErrorCode ierr;
1699 
1700   PetscFunctionBegin;
1701   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1702   PetscFunctionReturn(0);
1703 }
1704 
1705 #undef __FUNCT__
1706 #define __FUNCT__ "MatEqual_MPIAIJ"
1707 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1708 {
1709   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1710   Mat            a,b,c,d;
1711   PetscTruth     flg;
1712   PetscErrorCode ierr;
1713 
1714   PetscFunctionBegin;
1715   a = matA->A; b = matA->B;
1716   c = matB->A; d = matB->B;
1717 
1718   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1719   if (flg) {
1720     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1721   }
1722   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1723   PetscFunctionReturn(0);
1724 }
1725 
1726 #undef __FUNCT__
1727 #define __FUNCT__ "MatCopy_MPIAIJ"
1728 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1729 {
1730   PetscErrorCode ierr;
1731   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1732   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1733 
1734   PetscFunctionBegin;
1735   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1736   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1737     /* because of the column compression in the off-processor part of the matrix a->B,
1738        the number of columns in a->B and b->B may be different, hence we cannot call
1739        the MatCopy() directly on the two parts. If need be, we can provide a more
1740        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1741        then copying the submatrices */
1742     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1743   } else {
1744     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1745     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1746   }
1747   PetscFunctionReturn(0);
1748 }
1749 
1750 #undef __FUNCT__
1751 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1752 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1753 {
1754   PetscErrorCode ierr;
1755 
1756   PetscFunctionBegin;
1757   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1758   PetscFunctionReturn(0);
1759 }
1760 
1761 #include "petscblaslapack.h"
1762 #undef __FUNCT__
1763 #define __FUNCT__ "MatAXPY_MPIAIJ"
1764 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1765 {
1766   PetscErrorCode ierr;
1767   PetscInt       i;
1768   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1769   PetscBLASInt   bnz,one=1;
1770   Mat_SeqAIJ     *x,*y;
1771 
1772   PetscFunctionBegin;
1773   if (str == SAME_NONZERO_PATTERN) {
1774     PetscScalar alpha = a;
1775     x = (Mat_SeqAIJ *)xx->A->data;
1776     y = (Mat_SeqAIJ *)yy->A->data;
1777     bnz = PetscBLASIntCast(x->nz);
1778     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1779     x = (Mat_SeqAIJ *)xx->B->data;
1780     y = (Mat_SeqAIJ *)yy->B->data;
1781     bnz = PetscBLASIntCast(x->nz);
1782     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1783   } else if (str == SUBSET_NONZERO_PATTERN) {
1784     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1785 
1786     x = (Mat_SeqAIJ *)xx->B->data;
1787     y = (Mat_SeqAIJ *)yy->B->data;
1788     if (y->xtoy && y->XtoY != xx->B) {
1789       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1790       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1791     }
1792     if (!y->xtoy) { /* get xtoy */
1793       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1794       y->XtoY = xx->B;
1795       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1796     }
1797     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1798   } else {
1799     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1800   }
1801   PetscFunctionReturn(0);
1802 }
1803 
1804 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1805 
1806 #undef __FUNCT__
1807 #define __FUNCT__ "MatConjugate_MPIAIJ"
1808 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1809 {
1810 #if defined(PETSC_USE_COMPLEX)
1811   PetscErrorCode ierr;
1812   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1813 
1814   PetscFunctionBegin;
1815   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1816   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1817 #else
1818   PetscFunctionBegin;
1819 #endif
1820   PetscFunctionReturn(0);
1821 }
1822 
1823 #undef __FUNCT__
1824 #define __FUNCT__ "MatRealPart_MPIAIJ"
1825 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1826 {
1827   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1828   PetscErrorCode ierr;
1829 
1830   PetscFunctionBegin;
1831   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1832   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1833   PetscFunctionReturn(0);
1834 }
1835 
1836 #undef __FUNCT__
1837 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1838 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1839 {
1840   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1841   PetscErrorCode ierr;
1842 
1843   PetscFunctionBegin;
1844   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1845   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1846   PetscFunctionReturn(0);
1847 }
1848 
1849 #ifdef PETSC_HAVE_PBGL
1850 
1851 #include <boost/parallel/mpi/bsp_process_group.hpp>
1852 #include <boost/graph/distributed/ilu_default_graph.hpp>
1853 #include <boost/graph/distributed/ilu_0_block.hpp>
1854 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1855 #include <boost/graph/distributed/petsc/interface.hpp>
1856 #include <boost/multi_array.hpp>
1857 #include <boost/parallel/distributed_property_map->hpp>
1858 
1859 #undef __FUNCT__
1860 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1861 /*
1862   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1863 */
1864 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1865 {
1866   namespace petsc = boost::distributed::petsc;
1867 
1868   namespace graph_dist = boost::graph::distributed;
1869   using boost::graph::distributed::ilu_default::process_group_type;
1870   using boost::graph::ilu_permuted;
1871 
1872   PetscTruth      row_identity, col_identity;
1873   PetscContainer  c;
1874   PetscInt        m, n, M, N;
1875   PetscErrorCode  ierr;
1876 
1877   PetscFunctionBegin;
1878   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1879   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1880   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1881   if (!row_identity || !col_identity) {
1882     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1883   }
1884 
1885   process_group_type pg;
1886   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1887   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1888   lgraph_type&   level_graph = *lgraph_p;
1889   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1890 
1891   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1892   ilu_permuted(level_graph);
1893 
1894   /* put together the new matrix */
1895   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1896   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1897   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1898   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1899   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1900   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1901   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1902 
1903   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1904   ierr = PetscContainerSetPointer(c, lgraph_p);
1905   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1906   PetscFunctionReturn(0);
1907 }
1908 
1909 #undef __FUNCT__
1910 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1911 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1912 {
1913   PetscFunctionBegin;
1914   PetscFunctionReturn(0);
1915 }
1916 
1917 #undef __FUNCT__
1918 #define __FUNCT__ "MatSolve_MPIAIJ"
1919 /*
1920   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1921 */
1922 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1923 {
1924   namespace graph_dist = boost::graph::distributed;
1925 
1926   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1927   lgraph_type*   lgraph_p;
1928   PetscContainer c;
1929   PetscErrorCode ierr;
1930 
1931   PetscFunctionBegin;
1932   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1933   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1934   ierr = VecCopy(b, x); CHKERRQ(ierr);
1935 
1936   PetscScalar* array_x;
1937   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1938   PetscInt sx;
1939   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1940 
1941   PetscScalar* array_b;
1942   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1943   PetscInt sb;
1944   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1945 
1946   lgraph_type&   level_graph = *lgraph_p;
1947   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1948 
1949   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
1950   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
1951                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
1952 
1953   typedef boost::iterator_property_map<array_ref_type::iterator,
1954                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
1955   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
1956                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
1957 
1958   ilu_set_solve(*lgraph_p, vector_b, vector_x);
1959 
1960   PetscFunctionReturn(0);
1961 }
1962 #endif
1963 
1964 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
1965   PetscInt       nzlocal,nsends,nrecvs;
1966   PetscMPIInt    *send_rank;
1967   PetscInt       *sbuf_nz,*sbuf_j,**rbuf_j;
1968   PetscScalar    *sbuf_a,**rbuf_a;
1969   PetscErrorCode (*MatDestroy)(Mat);
1970 } Mat_Redundant;
1971 
1972 #undef __FUNCT__
1973 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
1974 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
1975 {
1976   PetscErrorCode       ierr;
1977   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
1978   PetscInt             i;
1979 
1980   PetscFunctionBegin;
1981   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
1982   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1983   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1984   for (i=0; i<redund->nrecvs; i++){
1985     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1986     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1987   }
1988   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1989   ierr = PetscFree(redund);CHKERRQ(ierr);
1990   PetscFunctionReturn(0);
1991 }
1992 
1993 #undef __FUNCT__
1994 #define __FUNCT__ "MatDestroy_MatRedundant"
1995 PetscErrorCode MatDestroy_MatRedundant(Mat A)
1996 {
1997   PetscErrorCode  ierr;
1998   PetscContainer  container;
1999   Mat_Redundant   *redund=PETSC_NULL;
2000 
2001   PetscFunctionBegin;
2002   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2003   if (container) {
2004     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2005   } else {
2006     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2007   }
2008   A->ops->destroy = redund->MatDestroy;
2009   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2010   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2011   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2012   PetscFunctionReturn(0);
2013 }
2014 
2015 #undef __FUNCT__
2016 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2017 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2018 {
2019   PetscMPIInt    rank,size;
2020   MPI_Comm       comm=((PetscObject)mat)->comm;
2021   PetscErrorCode ierr;
2022   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2023   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2024   PetscInt       *rowrange=mat->rmap->range;
2025   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2026   Mat            A=aij->A,B=aij->B,C=*matredundant;
2027   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2028   PetscScalar    *sbuf_a;
2029   PetscInt       nzlocal=a->nz+b->nz;
2030   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2031   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2032   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2033   MatScalar      *aworkA,*aworkB;
2034   PetscScalar    *vals;
2035   PetscMPIInt    tag1,tag2,tag3,imdex;
2036   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2037                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2038   MPI_Status     recv_status,*send_status;
2039   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2040   PetscInt       **rbuf_j=PETSC_NULL;
2041   PetscScalar    **rbuf_a=PETSC_NULL;
2042   Mat_Redundant  *redund=PETSC_NULL;
2043   PetscContainer container;
2044 
2045   PetscFunctionBegin;
2046   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2047   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2048 
2049   if (reuse == MAT_REUSE_MATRIX) {
2050     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2051     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2052     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2053     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2054     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2055     if (container) {
2056       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2057     } else {
2058       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2059     }
2060     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2061 
2062     nsends    = redund->nsends;
2063     nrecvs    = redund->nrecvs;
2064     send_rank = redund->send_rank; recv_rank = send_rank + size;
2065     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
2066     sbuf_j    = redund->sbuf_j;
2067     sbuf_a    = redund->sbuf_a;
2068     rbuf_j    = redund->rbuf_j;
2069     rbuf_a    = redund->rbuf_a;
2070   }
2071 
2072   if (reuse == MAT_INITIAL_MATRIX){
2073     PetscMPIInt  subrank,subsize;
2074     PetscInt     nleftover,np_subcomm;
2075     /* get the destination processors' id send_rank, nsends and nrecvs */
2076     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2077     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2078     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
2079     recv_rank = send_rank + size;
2080     np_subcomm = size/nsubcomm;
2081     nleftover  = size - nsubcomm*np_subcomm;
2082     nsends = 0; nrecvs = 0;
2083     for (i=0; i<size; i++){ /* i=rank*/
2084       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2085         send_rank[nsends] = i; nsends++;
2086         recv_rank[nrecvs++] = i;
2087       }
2088     }
2089     if (rank >= size - nleftover){/* this proc is a leftover processor */
2090       i = size-nleftover-1;
2091       j = 0;
2092       while (j < nsubcomm - nleftover){
2093         send_rank[nsends++] = i;
2094         i--; j++;
2095       }
2096     }
2097 
2098     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2099       for (i=0; i<nleftover; i++){
2100         recv_rank[nrecvs++] = size-nleftover+i;
2101       }
2102     }
2103 
2104     /* allocate sbuf_j, sbuf_a */
2105     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2106     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2107     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2108   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2109 
2110   /* copy mat's local entries into the buffers */
2111   if (reuse == MAT_INITIAL_MATRIX){
2112     rownz_max = 0;
2113     rptr = sbuf_j;
2114     cols = sbuf_j + rend-rstart + 1;
2115     vals = sbuf_a;
2116     rptr[0] = 0;
2117     for (i=0; i<rend-rstart; i++){
2118       row = i + rstart;
2119       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2120       ncols  = nzA + nzB;
2121       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2122       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2123       /* load the column indices for this row into cols */
2124       lwrite = 0;
2125       for (l=0; l<nzB; l++) {
2126         if ((ctmp = bmap[cworkB[l]]) < cstart){
2127           vals[lwrite]   = aworkB[l];
2128           cols[lwrite++] = ctmp;
2129         }
2130       }
2131       for (l=0; l<nzA; l++){
2132         vals[lwrite]   = aworkA[l];
2133         cols[lwrite++] = cstart + cworkA[l];
2134       }
2135       for (l=0; l<nzB; l++) {
2136         if ((ctmp = bmap[cworkB[l]]) >= cend){
2137           vals[lwrite]   = aworkB[l];
2138           cols[lwrite++] = ctmp;
2139         }
2140       }
2141       vals += ncols;
2142       cols += ncols;
2143       rptr[i+1] = rptr[i] + ncols;
2144       if (rownz_max < ncols) rownz_max = ncols;
2145     }
2146     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2147   } else { /* only copy matrix values into sbuf_a */
2148     rptr = sbuf_j;
2149     vals = sbuf_a;
2150     rptr[0] = 0;
2151     for (i=0; i<rend-rstart; i++){
2152       row = i + rstart;
2153       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2154       ncols  = nzA + nzB;
2155       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2156       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2157       lwrite = 0;
2158       for (l=0; l<nzB; l++) {
2159         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2160       }
2161       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2162       for (l=0; l<nzB; l++) {
2163         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2164       }
2165       vals += ncols;
2166       rptr[i+1] = rptr[i] + ncols;
2167     }
2168   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2169 
2170   /* send nzlocal to others, and recv other's nzlocal */
2171   /*--------------------------------------------------*/
2172   if (reuse == MAT_INITIAL_MATRIX){
2173     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2174     s_waits2 = s_waits3 + nsends;
2175     s_waits1 = s_waits2 + nsends;
2176     r_waits1 = s_waits1 + nsends;
2177     r_waits2 = r_waits1 + nrecvs;
2178     r_waits3 = r_waits2 + nrecvs;
2179   } else {
2180     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2181     r_waits3 = s_waits3 + nsends;
2182   }
2183 
2184   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2185   if (reuse == MAT_INITIAL_MATRIX){
2186     /* get new tags to keep the communication clean */
2187     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2188     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2189     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2190     rbuf_nz = sbuf_nz + nsends;
2191 
2192     /* post receives of other's nzlocal */
2193     for (i=0; i<nrecvs; i++){
2194       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2195     }
2196     /* send nzlocal to others */
2197     for (i=0; i<nsends; i++){
2198       sbuf_nz[i] = nzlocal;
2199       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2200     }
2201     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2202     count = nrecvs;
2203     while (count) {
2204       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2205       recv_rank[imdex] = recv_status.MPI_SOURCE;
2206       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2207       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2208 
2209       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2210       rbuf_nz[imdex] += i + 2;
2211       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2212       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2213       count--;
2214     }
2215     /* wait on sends of nzlocal */
2216     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2217     /* send mat->i,j to others, and recv from other's */
2218     /*------------------------------------------------*/
2219     for (i=0; i<nsends; i++){
2220       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2221       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2222     }
2223     /* wait on receives of mat->i,j */
2224     /*------------------------------*/
2225     count = nrecvs;
2226     while (count) {
2227       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2228       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2229       count--;
2230     }
2231     /* wait on sends of mat->i,j */
2232     /*---------------------------*/
2233     if (nsends) {
2234       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2235     }
2236   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2237 
2238   /* post receives, send and receive mat->a */
2239   /*----------------------------------------*/
2240   for (imdex=0; imdex<nrecvs; imdex++) {
2241     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2242   }
2243   for (i=0; i<nsends; i++){
2244     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2245   }
2246   count = nrecvs;
2247   while (count) {
2248     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2249     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2250     count--;
2251   }
2252   if (nsends) {
2253     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2254   }
2255 
2256   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2257 
2258   /* create redundant matrix */
2259   /*-------------------------*/
2260   if (reuse == MAT_INITIAL_MATRIX){
2261     /* compute rownz_max for preallocation */
2262     for (imdex=0; imdex<nrecvs; imdex++){
2263       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2264       rptr = rbuf_j[imdex];
2265       for (i=0; i<j; i++){
2266         ncols = rptr[i+1] - rptr[i];
2267         if (rownz_max < ncols) rownz_max = ncols;
2268       }
2269     }
2270 
2271     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2272     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2273     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2274     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2275     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2276   } else {
2277     C = *matredundant;
2278   }
2279 
2280   /* insert local matrix entries */
2281   rptr = sbuf_j;
2282   cols = sbuf_j + rend-rstart + 1;
2283   vals = sbuf_a;
2284   for (i=0; i<rend-rstart; i++){
2285     row   = i + rstart;
2286     ncols = rptr[i+1] - rptr[i];
2287     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2288     vals += ncols;
2289     cols += ncols;
2290   }
2291   /* insert received matrix entries */
2292   for (imdex=0; imdex<nrecvs; imdex++){
2293     rstart = rowrange[recv_rank[imdex]];
2294     rend   = rowrange[recv_rank[imdex]+1];
2295     rptr = rbuf_j[imdex];
2296     cols = rbuf_j[imdex] + rend-rstart + 1;
2297     vals = rbuf_a[imdex];
2298     for (i=0; i<rend-rstart; i++){
2299       row   = i + rstart;
2300       ncols = rptr[i+1] - rptr[i];
2301       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2302       vals += ncols;
2303       cols += ncols;
2304     }
2305   }
2306   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2307   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2308   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2309   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2310   if (reuse == MAT_INITIAL_MATRIX){
2311     PetscContainer container;
2312     *matredundant = C;
2313     /* create a supporting struct and attach it to C for reuse */
2314     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2315     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2316     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2317     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2318     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2319 
2320     redund->nzlocal = nzlocal;
2321     redund->nsends  = nsends;
2322     redund->nrecvs  = nrecvs;
2323     redund->send_rank = send_rank;
2324     redund->sbuf_nz = sbuf_nz;
2325     redund->sbuf_j  = sbuf_j;
2326     redund->sbuf_a  = sbuf_a;
2327     redund->rbuf_j  = rbuf_j;
2328     redund->rbuf_a  = rbuf_a;
2329 
2330     redund->MatDestroy = C->ops->destroy;
2331     C->ops->destroy    = MatDestroy_MatRedundant;
2332   }
2333   PetscFunctionReturn(0);
2334 }
2335 
2336 #undef __FUNCT__
2337 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2338 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2339 {
2340   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2341   PetscErrorCode ierr;
2342   PetscInt       i,*idxb = 0;
2343   PetscScalar    *va,*vb;
2344   Vec            vtmp;
2345 
2346   PetscFunctionBegin;
2347   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2348   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2349   if (idx) {
2350     for (i=0; i<A->rmap->n; i++) {
2351       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2352     }
2353   }
2354 
2355   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2356   if (idx) {
2357     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2358   }
2359   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2360   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2361 
2362   for (i=0; i<A->rmap->n; i++){
2363     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2364       va[i] = vb[i];
2365       if (idx) idx[i] = a->garray[idxb[i]];
2366     }
2367   }
2368 
2369   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2370   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2371   if (idxb) {
2372     ierr = PetscFree(idxb);CHKERRQ(ierr);
2373   }
2374   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2375   PetscFunctionReturn(0);
2376 }
2377 
2378 #undef __FUNCT__
2379 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2380 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2381 {
2382   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2383   PetscErrorCode ierr;
2384   PetscInt       i,*idxb = 0;
2385   PetscScalar    *va,*vb;
2386   Vec            vtmp;
2387 
2388   PetscFunctionBegin;
2389   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2390   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2391   if (idx) {
2392     for (i=0; i<A->cmap->n; i++) {
2393       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2394     }
2395   }
2396 
2397   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2398   if (idx) {
2399     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2400   }
2401   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2402   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2403 
2404   for (i=0; i<A->rmap->n; i++){
2405     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2406       va[i] = vb[i];
2407       if (idx) idx[i] = a->garray[idxb[i]];
2408     }
2409   }
2410 
2411   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2412   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2413   if (idxb) {
2414     ierr = PetscFree(idxb);CHKERRQ(ierr);
2415   }
2416   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2417   PetscFunctionReturn(0);
2418 }
2419 
2420 #undef __FUNCT__
2421 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2422 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2423 {
2424   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2425   PetscInt       n      = A->rmap->n;
2426   PetscInt       cstart = A->cmap->rstart;
2427   PetscInt      *cmap   = mat->garray;
2428   PetscInt      *diagIdx, *offdiagIdx;
2429   Vec            diagV, offdiagV;
2430   PetscScalar   *a, *diagA, *offdiagA;
2431   PetscInt       r;
2432   PetscErrorCode ierr;
2433 
2434   PetscFunctionBegin;
2435   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2436   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2437   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2438   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2439   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2440   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2441   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2442   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2443   for(r = 0; r < n; ++r) {
2444     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2445       a[r]   = diagA[r];
2446       idx[r] = cstart + diagIdx[r];
2447     } else {
2448       a[r]   = offdiagA[r];
2449       idx[r] = cmap[offdiagIdx[r]];
2450     }
2451   }
2452   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2453   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2454   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2455   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2456   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2457   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2458   PetscFunctionReturn(0);
2459 }
2460 
2461 #undef __FUNCT__
2462 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2463 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2464 {
2465   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2466   PetscInt       n      = A->rmap->n;
2467   PetscInt       cstart = A->cmap->rstart;
2468   PetscInt      *cmap   = mat->garray;
2469   PetscInt      *diagIdx, *offdiagIdx;
2470   Vec            diagV, offdiagV;
2471   PetscScalar   *a, *diagA, *offdiagA;
2472   PetscInt       r;
2473   PetscErrorCode ierr;
2474 
2475   PetscFunctionBegin;
2476   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2477   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2478   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2479   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2480   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2481   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2482   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2483   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2484   for(r = 0; r < n; ++r) {
2485     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2486       a[r]   = diagA[r];
2487       idx[r] = cstart + diagIdx[r];
2488     } else {
2489       a[r]   = offdiagA[r];
2490       idx[r] = cmap[offdiagIdx[r]];
2491     }
2492   }
2493   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2494   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2495   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2496   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2497   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2498   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2499   PetscFunctionReturn(0);
2500 }
2501 
2502 #undef __FUNCT__
2503 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2504 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[])
2505 {
2506   PetscErrorCode ierr;
2507 
2508   PetscFunctionBegin;
2509   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
2510   PetscFunctionReturn(0);
2511 }
2512 
2513 /* -------------------------------------------------------------------*/
2514 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2515        MatGetRow_MPIAIJ,
2516        MatRestoreRow_MPIAIJ,
2517        MatMult_MPIAIJ,
2518 /* 4*/ MatMultAdd_MPIAIJ,
2519        MatMultTranspose_MPIAIJ,
2520        MatMultTransposeAdd_MPIAIJ,
2521 #ifdef PETSC_HAVE_PBGL
2522        MatSolve_MPIAIJ,
2523 #else
2524        0,
2525 #endif
2526        0,
2527        0,
2528 /*10*/ 0,
2529        0,
2530        0,
2531        MatRelax_MPIAIJ,
2532        MatTranspose_MPIAIJ,
2533 /*15*/ MatGetInfo_MPIAIJ,
2534        MatEqual_MPIAIJ,
2535        MatGetDiagonal_MPIAIJ,
2536        MatDiagonalScale_MPIAIJ,
2537        MatNorm_MPIAIJ,
2538 /*20*/ MatAssemblyBegin_MPIAIJ,
2539        MatAssemblyEnd_MPIAIJ,
2540        0,
2541        MatSetOption_MPIAIJ,
2542        MatZeroEntries_MPIAIJ,
2543 /*25*/ MatZeroRows_MPIAIJ,
2544        0,
2545 #ifdef PETSC_HAVE_PBGL
2546        0,
2547 #else
2548        0,
2549 #endif
2550        0,
2551        0,
2552 /*30*/ MatSetUpPreallocation_MPIAIJ,
2553 #ifdef PETSC_HAVE_PBGL
2554        0,
2555 #else
2556        0,
2557 #endif
2558        0,
2559        0,
2560        0,
2561 /*35*/ MatDuplicate_MPIAIJ,
2562        0,
2563        0,
2564        0,
2565        0,
2566 /*40*/ MatAXPY_MPIAIJ,
2567        MatGetSubMatrices_MPIAIJ,
2568        MatIncreaseOverlap_MPIAIJ,
2569        MatGetValues_MPIAIJ,
2570        MatCopy_MPIAIJ,
2571 /*45*/ MatGetRowMax_MPIAIJ,
2572        MatScale_MPIAIJ,
2573        0,
2574        0,
2575        0,
2576 /*50*/ MatSetBlockSize_MPIAIJ,
2577        0,
2578        0,
2579        0,
2580        0,
2581 /*55*/ MatFDColoringCreate_MPIAIJ,
2582        0,
2583        MatSetUnfactored_MPIAIJ,
2584        MatPermute_MPIAIJ,
2585        0,
2586 /*60*/ MatGetSubMatrix_MPIAIJ,
2587        MatDestroy_MPIAIJ,
2588        MatView_MPIAIJ,
2589        0,
2590        0,
2591 /*65*/ 0,
2592        0,
2593        0,
2594        0,
2595        0,
2596 /*70*/ MatGetRowMaxAbs_MPIAIJ,
2597        MatGetRowMinAbs_MPIAIJ,
2598        0,
2599        MatSetColoring_MPIAIJ,
2600 #if defined(PETSC_HAVE_ADIC)
2601        MatSetValuesAdic_MPIAIJ,
2602 #else
2603        0,
2604 #endif
2605        MatSetValuesAdifor_MPIAIJ,
2606 /*75*/ 0,
2607        0,
2608        0,
2609        0,
2610        0,
2611 /*80*/ 0,
2612        0,
2613        0,
2614 /*84*/ MatLoad_MPIAIJ,
2615        0,
2616        0,
2617        0,
2618        0,
2619        0,
2620 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
2621        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2622        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2623        MatPtAP_Basic,
2624        MatPtAPSymbolic_MPIAIJ,
2625 /*95*/ MatPtAPNumeric_MPIAIJ,
2626        0,
2627        0,
2628        0,
2629        0,
2630 /*100*/0,
2631        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2632        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2633        MatConjugate_MPIAIJ,
2634        0,
2635 /*105*/MatSetValuesRow_MPIAIJ,
2636        MatRealPart_MPIAIJ,
2637        MatImaginaryPart_MPIAIJ,
2638        0,
2639        0,
2640 /*110*/0,
2641        MatGetRedundantMatrix_MPIAIJ,
2642        MatGetRowMin_MPIAIJ,
2643        0,
2644        0,
2645 /*115*/MatGetSeqNonzerostructure_MPIAIJ};
2646 
2647 /* ----------------------------------------------------------------------------------------*/
2648 
2649 EXTERN_C_BEGIN
2650 #undef __FUNCT__
2651 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2652 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2653 {
2654   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2655   PetscErrorCode ierr;
2656 
2657   PetscFunctionBegin;
2658   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2659   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2660   PetscFunctionReturn(0);
2661 }
2662 EXTERN_C_END
2663 
2664 EXTERN_C_BEGIN
2665 #undef __FUNCT__
2666 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2667 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2668 {
2669   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2670   PetscErrorCode ierr;
2671 
2672   PetscFunctionBegin;
2673   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2674   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2675   PetscFunctionReturn(0);
2676 }
2677 EXTERN_C_END
2678 
2679 #include "petscpc.h"
2680 EXTERN_C_BEGIN
2681 #undef __FUNCT__
2682 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2683 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2684 {
2685   Mat_MPIAIJ     *b;
2686   PetscErrorCode ierr;
2687   PetscInt       i;
2688 
2689   PetscFunctionBegin;
2690   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2691   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2692   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2693   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2694 
2695   ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2696   ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2697   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2698   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2699   if (d_nnz) {
2700     for (i=0; i<B->rmap->n; i++) {
2701       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2702     }
2703   }
2704   if (o_nnz) {
2705     for (i=0; i<B->rmap->n; i++) {
2706       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2707     }
2708   }
2709   b = (Mat_MPIAIJ*)B->data;
2710 
2711   if (!B->preallocated) {
2712     /* Explicitly create 2 MATSEQAIJ matrices. */
2713     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2714     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2715     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2716     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2717     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2718     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2719     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2720     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2721   }
2722 
2723   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2724   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2725   B->preallocated = PETSC_TRUE;
2726   PetscFunctionReturn(0);
2727 }
2728 EXTERN_C_END
2729 
2730 #undef __FUNCT__
2731 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2732 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2733 {
2734   Mat            mat;
2735   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2736   PetscErrorCode ierr;
2737 
2738   PetscFunctionBegin;
2739   *newmat       = 0;
2740   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2741   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2742   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2743   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2744   a    = (Mat_MPIAIJ*)mat->data;
2745 
2746   mat->factor       = matin->factor;
2747   mat->rmap->bs      = matin->rmap->bs;
2748   mat->assembled    = PETSC_TRUE;
2749   mat->insertmode   = NOT_SET_VALUES;
2750   mat->preallocated = PETSC_TRUE;
2751 
2752   a->size           = oldmat->size;
2753   a->rank           = oldmat->rank;
2754   a->donotstash     = oldmat->donotstash;
2755   a->roworiented    = oldmat->roworiented;
2756   a->rowindices     = 0;
2757   a->rowvalues      = 0;
2758   a->getrowactive   = PETSC_FALSE;
2759 
2760   ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr);
2761   ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr);
2762 
2763   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2764   if (oldmat->colmap) {
2765 #if defined (PETSC_USE_CTABLE)
2766     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2767 #else
2768     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2769     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2770     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2771 #endif
2772   } else a->colmap = 0;
2773   if (oldmat->garray) {
2774     PetscInt len;
2775     len  = oldmat->B->cmap->n;
2776     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2777     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2778     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2779   } else a->garray = 0;
2780 
2781   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2782   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2783   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2784   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2785   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2786   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2787   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2788   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2789   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2790   *newmat = mat;
2791   PetscFunctionReturn(0);
2792 }
2793 
2794 #include "petscsys.h"
2795 
2796 #undef __FUNCT__
2797 #define __FUNCT__ "MatLoad_MPIAIJ"
2798 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2799 {
2800   Mat            A;
2801   PetscScalar    *vals,*svals;
2802   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2803   MPI_Status     status;
2804   PetscErrorCode ierr;
2805   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz;
2806   PetscInt       i,nz,j,rstart,rend,mmax,maxnz;
2807   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2808   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2809   PetscInt       cend,cstart,n,*rowners;
2810   int            fd;
2811 
2812   PetscFunctionBegin;
2813   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2814   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2815   if (!rank) {
2816     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2817     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2818     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2819   }
2820 
2821   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2822   M = header[1]; N = header[2];
2823   /* determine ownership of all rows */
2824   m    = M/size + ((M % size) > rank);
2825   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2826   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2827 
2828   /* First process needs enough room for process with most rows */
2829   if (!rank) {
2830     mmax       = rowners[1];
2831     for (i=2; i<size; i++) {
2832       mmax = PetscMax(mmax,rowners[i]);
2833     }
2834   } else mmax = m;
2835 
2836   rowners[0] = 0;
2837   for (i=2; i<=size; i++) {
2838     rowners[i] += rowners[i-1];
2839   }
2840   rstart = rowners[rank];
2841   rend   = rowners[rank+1];
2842 
2843   /* distribute row lengths to all processors */
2844   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2845   if (!rank) {
2846     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2847     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2848     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2849     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2850     for (j=0; j<m; j++) {
2851       procsnz[0] += ourlens[j];
2852     }
2853     for (i=1; i<size; i++) {
2854       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2855       /* calculate the number of nonzeros on each processor */
2856       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2857         procsnz[i] += rowlengths[j];
2858       }
2859       mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]);
2860       ierr   = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2861     }
2862     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2863   } else {
2864     mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr);
2865     ierr   = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2866   }
2867 
2868   if (!rank) {
2869     /* determine max buffer needed and allocate it */
2870     maxnz = 0;
2871     for (i=0; i<size; i++) {
2872       maxnz = PetscMax(maxnz,procsnz[i]);
2873     }
2874     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2875 
2876     /* read in my part of the matrix column indices  */
2877     nz   = procsnz[0];
2878     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2879     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2880 
2881     /* read in every one elses and ship off */
2882     for (i=1; i<size; i++) {
2883       nz     = procsnz[i];
2884       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2885       mpicnt = PetscMPIIntCast(nz);
2886       ierr   = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2887     }
2888     ierr = PetscFree(cols);CHKERRQ(ierr);
2889   } else {
2890     /* determine buffer space needed for message */
2891     nz = 0;
2892     for (i=0; i<m; i++) {
2893       nz += ourlens[i];
2894     }
2895     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2896 
2897     /* receive message of column indices*/
2898     mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr);
2899     ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2900     ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr);
2901     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
2902     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
2903     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
2904   }
2905 
2906   /* determine column ownership if matrix is not square */
2907   if (N != M) {
2908     n      = N/size + ((N % size) > rank);
2909     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2910     cstart = cend - n;
2911   } else {
2912     cstart = rstart;
2913     cend   = rend;
2914     n      = cend - cstart;
2915   }
2916 
2917   /* loop over local rows, determining number of off diagonal entries */
2918   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2919   jj = 0;
2920   for (i=0; i<m; i++) {
2921     for (j=0; j<ourlens[i]; j++) {
2922       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2923       jj++;
2924     }
2925   }
2926 
2927   /* create our matrix */
2928   for (i=0; i<m; i++) {
2929     ourlens[i] -= offlens[i];
2930   }
2931   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2932   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2933   ierr = MatSetType(A,type);CHKERRQ(ierr);
2934   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2935 
2936   for (i=0; i<m; i++) {
2937     ourlens[i] += offlens[i];
2938   }
2939 
2940   if (!rank) {
2941     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2942 
2943     /* read in my part of the matrix numerical values  */
2944     nz   = procsnz[0];
2945     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2946 
2947     /* insert into matrix */
2948     jj      = rstart;
2949     smycols = mycols;
2950     svals   = vals;
2951     for (i=0; i<m; i++) {
2952       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2953       smycols += ourlens[i];
2954       svals   += ourlens[i];
2955       jj++;
2956     }
2957 
2958     /* read in other processors and ship out */
2959     for (i=1; i<size; i++) {
2960       nz     = procsnz[i];
2961       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2962       mpicnt = PetscMPIIntCast(nz);
2963       ierr   = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2964     }
2965     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2966   } else {
2967     /* receive numeric values */
2968     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2969 
2970     /* receive message of values*/
2971     mpicnt = PetscMPIIntCast(nz);
2972     ierr   = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2973     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr);
2974     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
2975     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
2976     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
2977 
2978     /* insert into matrix */
2979     jj      = rstart;
2980     smycols = mycols;
2981     svals   = vals;
2982     for (i=0; i<m; i++) {
2983       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2984       smycols += ourlens[i];
2985       svals   += ourlens[i];
2986       jj++;
2987     }
2988   }
2989   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2990   ierr = PetscFree(vals);CHKERRQ(ierr);
2991   ierr = PetscFree(mycols);CHKERRQ(ierr);
2992   ierr = PetscFree(rowners);CHKERRQ(ierr);
2993 
2994   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2995   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2996   *newmat = A;
2997   PetscFunctionReturn(0);
2998 }
2999 
3000 #undef __FUNCT__
3001 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3002 /*
3003     Not great since it makes two copies of the submatrix, first an SeqAIJ
3004   in local and then by concatenating the local matrices the end result.
3005   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3006 */
3007 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3008 {
3009   PetscErrorCode ierr;
3010   PetscMPIInt    rank,size;
3011   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3012   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3013   Mat            *local,M,Mreuse;
3014   MatScalar      *vwork,*aa;
3015   MPI_Comm       comm = ((PetscObject)mat)->comm;
3016   Mat_SeqAIJ     *aij;
3017 
3018 
3019   PetscFunctionBegin;
3020   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3021   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3022 
3023   if (call ==  MAT_REUSE_MATRIX) {
3024     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3025     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3026     local = &Mreuse;
3027     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3028   } else {
3029     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3030     Mreuse = *local;
3031     ierr   = PetscFree(local);CHKERRQ(ierr);
3032   }
3033 
3034   /*
3035       m - number of local rows
3036       n - number of columns (same on all processors)
3037       rstart - first row in new global matrix generated
3038   */
3039   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3040   if (call == MAT_INITIAL_MATRIX) {
3041     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3042     ii  = aij->i;
3043     jj  = aij->j;
3044 
3045     /*
3046         Determine the number of non-zeros in the diagonal and off-diagonal
3047         portions of the matrix in order to do correct preallocation
3048     */
3049 
3050     /* first get start and end of "diagonal" columns */
3051     if (csize == PETSC_DECIDE) {
3052       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3053       if (mglobal == n) { /* square matrix */
3054 	nlocal = m;
3055       } else {
3056         nlocal = n/size + ((n % size) > rank);
3057       }
3058     } else {
3059       nlocal = csize;
3060     }
3061     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3062     rstart = rend - nlocal;
3063     if (rank == size - 1 && rend != n) {
3064       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3065     }
3066 
3067     /* next, compute all the lengths */
3068     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3069     olens = dlens + m;
3070     for (i=0; i<m; i++) {
3071       jend = ii[i+1] - ii[i];
3072       olen = 0;
3073       dlen = 0;
3074       for (j=0; j<jend; j++) {
3075         if (*jj < rstart || *jj >= rend) olen++;
3076         else dlen++;
3077         jj++;
3078       }
3079       olens[i] = olen;
3080       dlens[i] = dlen;
3081     }
3082     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3083     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3084     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3085     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3086     ierr = PetscFree(dlens);CHKERRQ(ierr);
3087   } else {
3088     PetscInt ml,nl;
3089 
3090     M = *newmat;
3091     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3092     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3093     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3094     /*
3095          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3096        rather than the slower MatSetValues().
3097     */
3098     M->was_assembled = PETSC_TRUE;
3099     M->assembled     = PETSC_FALSE;
3100   }
3101   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3102   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3103   ii  = aij->i;
3104   jj  = aij->j;
3105   aa  = aij->a;
3106   for (i=0; i<m; i++) {
3107     row   = rstart + i;
3108     nz    = ii[i+1] - ii[i];
3109     cwork = jj;     jj += nz;
3110     vwork = aa;     aa += nz;
3111     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3112   }
3113 
3114   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3115   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3116   *newmat = M;
3117 
3118   /* save submatrix used in processor for next request */
3119   if (call ==  MAT_INITIAL_MATRIX) {
3120     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3121     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3122   }
3123 
3124   PetscFunctionReturn(0);
3125 }
3126 
3127 EXTERN_C_BEGIN
3128 #undef __FUNCT__
3129 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3130 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3131 {
3132   PetscInt       m,cstart, cend,j,nnz,i,d;
3133   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3134   const PetscInt *JJ;
3135   PetscScalar    *values;
3136   PetscErrorCode ierr;
3137 
3138   PetscFunctionBegin;
3139   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3140 
3141   ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3142   ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3143   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
3144   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
3145   m      = B->rmap->n;
3146   cstart = B->cmap->rstart;
3147   cend   = B->cmap->rend;
3148   rstart = B->rmap->rstart;
3149 
3150   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
3151   o_nnz = d_nnz + m;
3152 
3153 #if defined(PETSC_USE_DEBUGGING)
3154   for (i=0; i<m; i++) {
3155     nnz     = Ii[i+1]- Ii[i];
3156     JJ      = J + Ii[i];
3157     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3158     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3159     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3160     for (j=1; j<nnz; j++) {
3161       if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j);
3162     }
3163   }
3164 #endif
3165 
3166   for (i=0; i<m; i++) {
3167     nnz     = Ii[i+1]- Ii[i];
3168     JJ      = J + Ii[i];
3169     nnz_max = PetscMax(nnz_max,nnz);
3170     for (j=0; j<nnz; j++) {
3171       if (*JJ >= cstart) break;
3172       JJ++;
3173     }
3174     d = 0;
3175     for (; j<nnz; j++) {
3176       if (*JJ++ >= cend) break;
3177       d++;
3178     }
3179     d_nnz[i] = d;
3180     o_nnz[i] = nnz - d;
3181   }
3182   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3183   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
3184 
3185   if (v) values = (PetscScalar*)v;
3186   else {
3187     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3188     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3189   }
3190 
3191   for (i=0; i<m; i++) {
3192     ii   = i + rstart;
3193     nnz  = Ii[i+1]- Ii[i];
3194     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3195   }
3196   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3197   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3198 
3199   if (!v) {
3200     ierr = PetscFree(values);CHKERRQ(ierr);
3201   }
3202   PetscFunctionReturn(0);
3203 }
3204 EXTERN_C_END
3205 
3206 #undef __FUNCT__
3207 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3208 /*@
3209    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3210    (the default parallel PETSc format).
3211 
3212    Collective on MPI_Comm
3213 
3214    Input Parameters:
3215 +  B - the matrix
3216 .  i - the indices into j for the start of each local row (starts with zero)
3217 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3218 -  v - optional values in the matrix
3219 
3220    Level: developer
3221 
3222    Notes:
3223        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3224      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3225      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3226 
3227        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3228 
3229        The format which is used for the sparse matrix input, is equivalent to a
3230     row-major ordering.. i.e for the following matrix, the input data expected is
3231     as shown:
3232 
3233         1 0 0
3234         2 0 3     P0
3235        -------
3236         4 5 6     P1
3237 
3238      Process0 [P0]: rows_owned=[0,1]
3239         i =  {0,1,3}  [size = nrow+1  = 2+1]
3240         j =  {0,0,2}  [size = nz = 6]
3241         v =  {1,2,3}  [size = nz = 6]
3242 
3243      Process1 [P1]: rows_owned=[2]
3244         i =  {0,3}    [size = nrow+1  = 1+1]
3245         j =  {0,1,2}  [size = nz = 6]
3246         v =  {4,5,6}  [size = nz = 6]
3247 
3248       The column indices for each row MUST be sorted.
3249 
3250 .keywords: matrix, aij, compressed row, sparse, parallel
3251 
3252 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3253           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3254 @*/
3255 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3256 {
3257   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3258 
3259   PetscFunctionBegin;
3260   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3261   if (f) {
3262     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3263   }
3264   PetscFunctionReturn(0);
3265 }
3266 
3267 #undef __FUNCT__
3268 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3269 /*@C
3270    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3271    (the default parallel PETSc format).  For good matrix assembly performance
3272    the user should preallocate the matrix storage by setting the parameters
3273    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3274    performance can be increased by more than a factor of 50.
3275 
3276    Collective on MPI_Comm
3277 
3278    Input Parameters:
3279 +  A - the matrix
3280 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3281            (same value is used for all local rows)
3282 .  d_nnz - array containing the number of nonzeros in the various rows of the
3283            DIAGONAL portion of the local submatrix (possibly different for each row)
3284            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3285            The size of this array is equal to the number of local rows, i.e 'm'.
3286            You must leave room for the diagonal entry even if it is zero.
3287 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3288            submatrix (same value is used for all local rows).
3289 -  o_nnz - array containing the number of nonzeros in the various rows of the
3290            OFF-DIAGONAL portion of the local submatrix (possibly different for
3291            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3292            structure. The size of this array is equal to the number
3293            of local rows, i.e 'm'.
3294 
3295    If the *_nnz parameter is given then the *_nz parameter is ignored
3296 
3297    The AIJ format (also called the Yale sparse matrix format or
3298    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3299    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3300 
3301    The parallel matrix is partitioned such that the first m0 rows belong to
3302    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3303    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3304 
3305    The DIAGONAL portion of the local submatrix of a processor can be defined
3306    as the submatrix which is obtained by extraction the part corresponding
3307    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3308    first row that belongs to the processor, and r2 is the last row belonging
3309    to the this processor. This is a square mxm matrix. The remaining portion
3310    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3311 
3312    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3313 
3314    You can call MatGetInfo() to get information on how effective the preallocation was;
3315    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3316    You can also run with the option -info and look for messages with the string
3317    malloc in them to see if additional memory allocation was needed.
3318 
3319    Example usage:
3320 
3321    Consider the following 8x8 matrix with 34 non-zero values, that is
3322    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3323    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3324    as follows:
3325 
3326 .vb
3327             1  2  0  |  0  3  0  |  0  4
3328     Proc0   0  5  6  |  7  0  0  |  8  0
3329             9  0 10  | 11  0  0  | 12  0
3330     -------------------------------------
3331            13  0 14  | 15 16 17  |  0  0
3332     Proc1   0 18  0  | 19 20 21  |  0  0
3333             0  0  0  | 22 23  0  | 24  0
3334     -------------------------------------
3335     Proc2  25 26 27  |  0  0 28  | 29  0
3336            30  0  0  | 31 32 33  |  0 34
3337 .ve
3338 
3339    This can be represented as a collection of submatrices as:
3340 
3341 .vb
3342       A B C
3343       D E F
3344       G H I
3345 .ve
3346 
3347    Where the submatrices A,B,C are owned by proc0, D,E,F are
3348    owned by proc1, G,H,I are owned by proc2.
3349 
3350    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3351    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3352    The 'M','N' parameters are 8,8, and have the same values on all procs.
3353 
3354    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3355    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3356    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3357    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3358    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3359    matrix, ans [DF] as another SeqAIJ matrix.
3360 
3361    When d_nz, o_nz parameters are specified, d_nz storage elements are
3362    allocated for every row of the local diagonal submatrix, and o_nz
3363    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3364    One way to choose d_nz and o_nz is to use the max nonzerors per local
3365    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3366    In this case, the values of d_nz,o_nz are:
3367 .vb
3368      proc0 : dnz = 2, o_nz = 2
3369      proc1 : dnz = 3, o_nz = 2
3370      proc2 : dnz = 1, o_nz = 4
3371 .ve
3372    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3373    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3374    for proc3. i.e we are using 12+15+10=37 storage locations to store
3375    34 values.
3376 
3377    When d_nnz, o_nnz parameters are specified, the storage is specified
3378    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3379    In the above case the values for d_nnz,o_nnz are:
3380 .vb
3381      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3382      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3383      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3384 .ve
3385    Here the space allocated is sum of all the above values i.e 34, and
3386    hence pre-allocation is perfect.
3387 
3388    Level: intermediate
3389 
3390 .keywords: matrix, aij, compressed row, sparse, parallel
3391 
3392 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3393           MPIAIJ, MatGetInfo()
3394 @*/
3395 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3396 {
3397   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3398 
3399   PetscFunctionBegin;
3400   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3401   if (f) {
3402     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3403   }
3404   PetscFunctionReturn(0);
3405 }
3406 
3407 #undef __FUNCT__
3408 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3409 /*@
3410      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3411          CSR format the local rows.
3412 
3413    Collective on MPI_Comm
3414 
3415    Input Parameters:
3416 +  comm - MPI communicator
3417 .  m - number of local rows (Cannot be PETSC_DECIDE)
3418 .  n - This value should be the same as the local size used in creating the
3419        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3420        calculated if N is given) For square matrices n is almost always m.
3421 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3422 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3423 .   i - row indices
3424 .   j - column indices
3425 -   a - matrix values
3426 
3427    Output Parameter:
3428 .   mat - the matrix
3429 
3430    Level: intermediate
3431 
3432    Notes:
3433        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3434      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3435      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3436 
3437        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3438 
3439        The format which is used for the sparse matrix input, is equivalent to a
3440     row-major ordering.. i.e for the following matrix, the input data expected is
3441     as shown:
3442 
3443         1 0 0
3444         2 0 3     P0
3445        -------
3446         4 5 6     P1
3447 
3448      Process0 [P0]: rows_owned=[0,1]
3449         i =  {0,1,3}  [size = nrow+1  = 2+1]
3450         j =  {0,0,2}  [size = nz = 6]
3451         v =  {1,2,3}  [size = nz = 6]
3452 
3453      Process1 [P1]: rows_owned=[2]
3454         i =  {0,3}    [size = nrow+1  = 1+1]
3455         j =  {0,1,2}  [size = nz = 6]
3456         v =  {4,5,6}  [size = nz = 6]
3457 
3458 .keywords: matrix, aij, compressed row, sparse, parallel
3459 
3460 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3461           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3462 @*/
3463 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3464 {
3465   PetscErrorCode ierr;
3466 
3467  PetscFunctionBegin;
3468   if (i[0]) {
3469     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3470   }
3471   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3472   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3473   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3474   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3475   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3476   PetscFunctionReturn(0);
3477 }
3478 
3479 #undef __FUNCT__
3480 #define __FUNCT__ "MatCreateMPIAIJ"
3481 /*@C
3482    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3483    (the default parallel PETSc format).  For good matrix assembly performance
3484    the user should preallocate the matrix storage by setting the parameters
3485    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3486    performance can be increased by more than a factor of 50.
3487 
3488    Collective on MPI_Comm
3489 
3490    Input Parameters:
3491 +  comm - MPI communicator
3492 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3493            This value should be the same as the local size used in creating the
3494            y vector for the matrix-vector product y = Ax.
3495 .  n - This value should be the same as the local size used in creating the
3496        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3497        calculated if N is given) For square matrices n is almost always m.
3498 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3499 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3500 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3501            (same value is used for all local rows)
3502 .  d_nnz - array containing the number of nonzeros in the various rows of the
3503            DIAGONAL portion of the local submatrix (possibly different for each row)
3504            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3505            The size of this array is equal to the number of local rows, i.e 'm'.
3506            You must leave room for the diagonal entry even if it is zero.
3507 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3508            submatrix (same value is used for all local rows).
3509 -  o_nnz - array containing the number of nonzeros in the various rows of the
3510            OFF-DIAGONAL portion of the local submatrix (possibly different for
3511            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3512            structure. The size of this array is equal to the number
3513            of local rows, i.e 'm'.
3514 
3515    Output Parameter:
3516 .  A - the matrix
3517 
3518    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3519    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3520    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3521 
3522    Notes:
3523    If the *_nnz parameter is given then the *_nz parameter is ignored
3524 
3525    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3526    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3527    storage requirements for this matrix.
3528 
3529    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3530    processor than it must be used on all processors that share the object for
3531    that argument.
3532 
3533    The user MUST specify either the local or global matrix dimensions
3534    (possibly both).
3535 
3536    The parallel matrix is partitioned across processors such that the
3537    first m0 rows belong to process 0, the next m1 rows belong to
3538    process 1, the next m2 rows belong to process 2 etc.. where
3539    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3540    values corresponding to [m x N] submatrix.
3541 
3542    The columns are logically partitioned with the n0 columns belonging
3543    to 0th partition, the next n1 columns belonging to the next
3544    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3545 
3546    The DIAGONAL portion of the local submatrix on any given processor
3547    is the submatrix corresponding to the rows and columns m,n
3548    corresponding to the given processor. i.e diagonal matrix on
3549    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3550    etc. The remaining portion of the local submatrix [m x (N-n)]
3551    constitute the OFF-DIAGONAL portion. The example below better
3552    illustrates this concept.
3553 
3554    For a square global matrix we define each processor's diagonal portion
3555    to be its local rows and the corresponding columns (a square submatrix);
3556    each processor's off-diagonal portion encompasses the remainder of the
3557    local matrix (a rectangular submatrix).
3558 
3559    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3560 
3561    When calling this routine with a single process communicator, a matrix of
3562    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3563    type of communicator, use the construction mechanism:
3564      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3565 
3566    By default, this format uses inodes (identical nodes) when possible.
3567    We search for consecutive rows with the same nonzero structure, thereby
3568    reusing matrix information to achieve increased efficiency.
3569 
3570    Options Database Keys:
3571 +  -mat_no_inode  - Do not use inodes
3572 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3573 -  -mat_aij_oneindex - Internally use indexing starting at 1
3574         rather than 0.  Note that when calling MatSetValues(),
3575         the user still MUST index entries starting at 0!
3576 
3577 
3578    Example usage:
3579 
3580    Consider the following 8x8 matrix with 34 non-zero values, that is
3581    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3582    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3583    as follows:
3584 
3585 .vb
3586             1  2  0  |  0  3  0  |  0  4
3587     Proc0   0  5  6  |  7  0  0  |  8  0
3588             9  0 10  | 11  0  0  | 12  0
3589     -------------------------------------
3590            13  0 14  | 15 16 17  |  0  0
3591     Proc1   0 18  0  | 19 20 21  |  0  0
3592             0  0  0  | 22 23  0  | 24  0
3593     -------------------------------------
3594     Proc2  25 26 27  |  0  0 28  | 29  0
3595            30  0  0  | 31 32 33  |  0 34
3596 .ve
3597 
3598    This can be represented as a collection of submatrices as:
3599 
3600 .vb
3601       A B C
3602       D E F
3603       G H I
3604 .ve
3605 
3606    Where the submatrices A,B,C are owned by proc0, D,E,F are
3607    owned by proc1, G,H,I are owned by proc2.
3608 
3609    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3610    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3611    The 'M','N' parameters are 8,8, and have the same values on all procs.
3612 
3613    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3614    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3615    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3616    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3617    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3618    matrix, ans [DF] as another SeqAIJ matrix.
3619 
3620    When d_nz, o_nz parameters are specified, d_nz storage elements are
3621    allocated for every row of the local diagonal submatrix, and o_nz
3622    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3623    One way to choose d_nz and o_nz is to use the max nonzerors per local
3624    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3625    In this case, the values of d_nz,o_nz are:
3626 .vb
3627      proc0 : dnz = 2, o_nz = 2
3628      proc1 : dnz = 3, o_nz = 2
3629      proc2 : dnz = 1, o_nz = 4
3630 .ve
3631    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3632    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3633    for proc3. i.e we are using 12+15+10=37 storage locations to store
3634    34 values.
3635 
3636    When d_nnz, o_nnz parameters are specified, the storage is specified
3637    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3638    In the above case the values for d_nnz,o_nnz are:
3639 .vb
3640      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3641      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3642      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3643 .ve
3644    Here the space allocated is sum of all the above values i.e 34, and
3645    hence pre-allocation is perfect.
3646 
3647    Level: intermediate
3648 
3649 .keywords: matrix, aij, compressed row, sparse, parallel
3650 
3651 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3652           MPIAIJ, MatCreateMPIAIJWithArrays()
3653 @*/
3654 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3655 {
3656   PetscErrorCode ierr;
3657   PetscMPIInt    size;
3658 
3659   PetscFunctionBegin;
3660   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3661   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3662   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3663   if (size > 1) {
3664     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3665     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3666   } else {
3667     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3668     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3669   }
3670   PetscFunctionReturn(0);
3671 }
3672 
3673 #undef __FUNCT__
3674 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3675 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3676 {
3677   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3678 
3679   PetscFunctionBegin;
3680   *Ad     = a->A;
3681   *Ao     = a->B;
3682   *colmap = a->garray;
3683   PetscFunctionReturn(0);
3684 }
3685 
3686 #undef __FUNCT__
3687 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3688 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3689 {
3690   PetscErrorCode ierr;
3691   PetscInt       i;
3692   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3693 
3694   PetscFunctionBegin;
3695   if (coloring->ctype == IS_COLORING_GLOBAL) {
3696     ISColoringValue *allcolors,*colors;
3697     ISColoring      ocoloring;
3698 
3699     /* set coloring for diagonal portion */
3700     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3701 
3702     /* set coloring for off-diagonal portion */
3703     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3704     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3705     for (i=0; i<a->B->cmap->n; i++) {
3706       colors[i] = allcolors[a->garray[i]];
3707     }
3708     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3709     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3710     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3711     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3712   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3713     ISColoringValue *colors;
3714     PetscInt        *larray;
3715     ISColoring      ocoloring;
3716 
3717     /* set coloring for diagonal portion */
3718     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3719     for (i=0; i<a->A->cmap->n; i++) {
3720       larray[i] = i + A->cmap->rstart;
3721     }
3722     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3723     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3724     for (i=0; i<a->A->cmap->n; i++) {
3725       colors[i] = coloring->colors[larray[i]];
3726     }
3727     ierr = PetscFree(larray);CHKERRQ(ierr);
3728     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3729     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3730     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3731 
3732     /* set coloring for off-diagonal portion */
3733     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3734     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3735     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3736     for (i=0; i<a->B->cmap->n; i++) {
3737       colors[i] = coloring->colors[larray[i]];
3738     }
3739     ierr = PetscFree(larray);CHKERRQ(ierr);
3740     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3741     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3742     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3743   } else {
3744     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3745   }
3746 
3747   PetscFunctionReturn(0);
3748 }
3749 
3750 #if defined(PETSC_HAVE_ADIC)
3751 #undef __FUNCT__
3752 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3753 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3754 {
3755   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3756   PetscErrorCode ierr;
3757 
3758   PetscFunctionBegin;
3759   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3760   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3761   PetscFunctionReturn(0);
3762 }
3763 #endif
3764 
3765 #undef __FUNCT__
3766 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3767 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3768 {
3769   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3770   PetscErrorCode ierr;
3771 
3772   PetscFunctionBegin;
3773   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3774   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3775   PetscFunctionReturn(0);
3776 }
3777 
3778 #undef __FUNCT__
3779 #define __FUNCT__ "MatMerge"
3780 /*@
3781       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3782                  matrices from each processor
3783 
3784     Collective on MPI_Comm
3785 
3786    Input Parameters:
3787 +    comm - the communicators the parallel matrix will live on
3788 .    inmat - the input sequential matrices
3789 .    n - number of local columns (or PETSC_DECIDE)
3790 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3791 
3792    Output Parameter:
3793 .    outmat - the parallel matrix generated
3794 
3795     Level: advanced
3796 
3797    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3798 
3799 @*/
3800 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3801 {
3802   PetscErrorCode ierr;
3803   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3804   PetscInt       *indx;
3805   PetscScalar    *values;
3806 
3807   PetscFunctionBegin;
3808   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3809   if (scall == MAT_INITIAL_MATRIX){
3810     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3811     if (n == PETSC_DECIDE){
3812       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3813     }
3814     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3815     rstart -= m;
3816 
3817     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3818     for (i=0;i<m;i++) {
3819       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3820       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3821       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3822     }
3823     /* This routine will ONLY return MPIAIJ type matrix */
3824     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3825     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3826     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3827     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3828     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3829 
3830   } else if (scall == MAT_REUSE_MATRIX){
3831     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3832   } else {
3833     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3834   }
3835 
3836   for (i=0;i<m;i++) {
3837     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3838     Ii    = i + rstart;
3839     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3840     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3841   }
3842   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3843   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3844   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3845 
3846   PetscFunctionReturn(0);
3847 }
3848 
3849 #undef __FUNCT__
3850 #define __FUNCT__ "MatFileSplit"
3851 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3852 {
3853   PetscErrorCode    ierr;
3854   PetscMPIInt       rank;
3855   PetscInt          m,N,i,rstart,nnz;
3856   size_t            len;
3857   const PetscInt    *indx;
3858   PetscViewer       out;
3859   char              *name;
3860   Mat               B;
3861   const PetscScalar *values;
3862 
3863   PetscFunctionBegin;
3864   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3865   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3866   /* Should this be the type of the diagonal block of A? */
3867   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3868   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3869   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3870   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3871   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3872   for (i=0;i<m;i++) {
3873     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3874     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3875     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3876   }
3877   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3878   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3879 
3880   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3881   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3882   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3883   sprintf(name,"%s.%d",outfile,rank);
3884   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3885   ierr = PetscFree(name);
3886   ierr = MatView(B,out);CHKERRQ(ierr);
3887   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3888   ierr = MatDestroy(B);CHKERRQ(ierr);
3889   PetscFunctionReturn(0);
3890 }
3891 
3892 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3893 #undef __FUNCT__
3894 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3895 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3896 {
3897   PetscErrorCode       ierr;
3898   Mat_Merge_SeqsToMPI  *merge;
3899   PetscContainer       container;
3900 
3901   PetscFunctionBegin;
3902   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3903   if (container) {
3904     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3905     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3906     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3907     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3908     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3909     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3910     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3911     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3912     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3913     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3914     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3915     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
3916 
3917     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
3918     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3919   }
3920   ierr = PetscFree(merge);CHKERRQ(ierr);
3921 
3922   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3923   PetscFunctionReturn(0);
3924 }
3925 
3926 #include "../src/mat/utils/freespace.h"
3927 #include "petscbt.h"
3928 
3929 #undef __FUNCT__
3930 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3931 /*@C
3932       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3933                  matrices from each processor
3934 
3935     Collective on MPI_Comm
3936 
3937    Input Parameters:
3938 +    comm - the communicators the parallel matrix will live on
3939 .    seqmat - the input sequential matrices
3940 .    m - number of local rows (or PETSC_DECIDE)
3941 .    n - number of local columns (or PETSC_DECIDE)
3942 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3943 
3944    Output Parameter:
3945 .    mpimat - the parallel matrix generated
3946 
3947     Level: advanced
3948 
3949    Notes:
3950      The dimensions of the sequential matrix in each processor MUST be the same.
3951      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3952      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3953 @*/
3954 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3955 {
3956   PetscErrorCode       ierr;
3957   MPI_Comm             comm=((PetscObject)mpimat)->comm;
3958   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3959   PetscMPIInt          size,rank,taga,*len_s;
3960   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
3961   PetscInt             proc,m;
3962   PetscInt             **buf_ri,**buf_rj;
3963   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3964   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3965   MPI_Request          *s_waits,*r_waits;
3966   MPI_Status           *status;
3967   MatScalar            *aa=a->a;
3968   MatScalar            **abuf_r,*ba_i;
3969   Mat_Merge_SeqsToMPI  *merge;
3970   PetscContainer       container;
3971 
3972   PetscFunctionBegin;
3973   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3974 
3975   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3976   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3977 
3978   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3979   if (container) {
3980     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3981   }
3982   bi     = merge->bi;
3983   bj     = merge->bj;
3984   buf_ri = merge->buf_ri;
3985   buf_rj = merge->buf_rj;
3986 
3987   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3988   owners = merge->rowmap.range;
3989   len_s  = merge->len_s;
3990 
3991   /* send and recv matrix values */
3992   /*-----------------------------*/
3993   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3994   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3995 
3996   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3997   for (proc=0,k=0; proc<size; proc++){
3998     if (!len_s[proc]) continue;
3999     i = owners[proc];
4000     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4001     k++;
4002   }
4003 
4004   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4005   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4006   ierr = PetscFree(status);CHKERRQ(ierr);
4007 
4008   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4009   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4010 
4011   /* insert mat values of mpimat */
4012   /*----------------------------*/
4013   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4014   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4015   nextrow = buf_ri_k + merge->nrecv;
4016   nextai  = nextrow + merge->nrecv;
4017 
4018   for (k=0; k<merge->nrecv; k++){
4019     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4020     nrows = *(buf_ri_k[k]);
4021     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4022     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4023   }
4024 
4025   /* set values of ba */
4026   m = merge->rowmap.n;
4027   for (i=0; i<m; i++) {
4028     arow = owners[rank] + i;
4029     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4030     bnzi = bi[i+1] - bi[i];
4031     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4032 
4033     /* add local non-zero vals of this proc's seqmat into ba */
4034     anzi = ai[arow+1] - ai[arow];
4035     aj   = a->j + ai[arow];
4036     aa   = a->a + ai[arow];
4037     nextaj = 0;
4038     for (j=0; nextaj<anzi; j++){
4039       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4040         ba_i[j] += aa[nextaj++];
4041       }
4042     }
4043 
4044     /* add received vals into ba */
4045     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4046       /* i-th row */
4047       if (i == *nextrow[k]) {
4048         anzi = *(nextai[k]+1) - *nextai[k];
4049         aj   = buf_rj[k] + *(nextai[k]);
4050         aa   = abuf_r[k] + *(nextai[k]);
4051         nextaj = 0;
4052         for (j=0; nextaj<anzi; j++){
4053           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4054             ba_i[j] += aa[nextaj++];
4055           }
4056         }
4057         nextrow[k]++; nextai[k]++;
4058       }
4059     }
4060     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4061   }
4062   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4063   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4064 
4065   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4066   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4067   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4068   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4069   PetscFunctionReturn(0);
4070 }
4071 
4072 #undef __FUNCT__
4073 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4074 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4075 {
4076   PetscErrorCode       ierr;
4077   Mat                  B_mpi;
4078   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4079   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4080   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4081   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4082   PetscInt             len,proc,*dnz,*onz;
4083   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4084   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4085   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4086   MPI_Status           *status;
4087   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4088   PetscBT              lnkbt;
4089   Mat_Merge_SeqsToMPI  *merge;
4090   PetscContainer       container;
4091 
4092   PetscFunctionBegin;
4093   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4094 
4095   /* make sure it is a PETSc comm */
4096   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4097   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4098   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4099 
4100   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4101   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4102 
4103   /* determine row ownership */
4104   /*---------------------------------------------------------*/
4105   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
4106   merge->rowmap.n = m;
4107   merge->rowmap.N = M;
4108   merge->rowmap.bs = 1;
4109   ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr);
4110   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4111   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4112 
4113   m      = merge->rowmap.n;
4114   M      = merge->rowmap.N;
4115   owners = merge->rowmap.range;
4116 
4117   /* determine the number of messages to send, their lengths */
4118   /*---------------------------------------------------------*/
4119   len_s  = merge->len_s;
4120 
4121   len = 0;  /* length of buf_si[] */
4122   merge->nsend = 0;
4123   for (proc=0; proc<size; proc++){
4124     len_si[proc] = 0;
4125     if (proc == rank){
4126       len_s[proc] = 0;
4127     } else {
4128       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4129       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4130     }
4131     if (len_s[proc]) {
4132       merge->nsend++;
4133       nrows = 0;
4134       for (i=owners[proc]; i<owners[proc+1]; i++){
4135         if (ai[i+1] > ai[i]) nrows++;
4136       }
4137       len_si[proc] = 2*(nrows+1);
4138       len += len_si[proc];
4139     }
4140   }
4141 
4142   /* determine the number and length of messages to receive for ij-structure */
4143   /*-------------------------------------------------------------------------*/
4144   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4145   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4146 
4147   /* post the Irecv of j-structure */
4148   /*-------------------------------*/
4149   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4150   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4151 
4152   /* post the Isend of j-structure */
4153   /*--------------------------------*/
4154   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
4155   sj_waits = si_waits + merge->nsend;
4156 
4157   for (proc=0, k=0; proc<size; proc++){
4158     if (!len_s[proc]) continue;
4159     i = owners[proc];
4160     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4161     k++;
4162   }
4163 
4164   /* receives and sends of j-structure are complete */
4165   /*------------------------------------------------*/
4166   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4167   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4168 
4169   /* send and recv i-structure */
4170   /*---------------------------*/
4171   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4172   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4173 
4174   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4175   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4176   for (proc=0,k=0; proc<size; proc++){
4177     if (!len_s[proc]) continue;
4178     /* form outgoing message for i-structure:
4179          buf_si[0]:                 nrows to be sent
4180                [1:nrows]:           row index (global)
4181                [nrows+1:2*nrows+1]: i-structure index
4182     */
4183     /*-------------------------------------------*/
4184     nrows = len_si[proc]/2 - 1;
4185     buf_si_i    = buf_si + nrows+1;
4186     buf_si[0]   = nrows;
4187     buf_si_i[0] = 0;
4188     nrows = 0;
4189     for (i=owners[proc]; i<owners[proc+1]; i++){
4190       anzi = ai[i+1] - ai[i];
4191       if (anzi) {
4192         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4193         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4194         nrows++;
4195       }
4196     }
4197     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4198     k++;
4199     buf_si += len_si[proc];
4200   }
4201 
4202   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4203   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4204 
4205   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4206   for (i=0; i<merge->nrecv; i++){
4207     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4208   }
4209 
4210   ierr = PetscFree(len_si);CHKERRQ(ierr);
4211   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4212   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4213   ierr = PetscFree(si_waits);CHKERRQ(ierr);
4214   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4215   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4216   ierr = PetscFree(status);CHKERRQ(ierr);
4217 
4218   /* compute a local seq matrix in each processor */
4219   /*----------------------------------------------*/
4220   /* allocate bi array and free space for accumulating nonzero column info */
4221   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4222   bi[0] = 0;
4223 
4224   /* create and initialize a linked list */
4225   nlnk = N+1;
4226   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4227 
4228   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4229   len = 0;
4230   len  = ai[owners[rank+1]] - ai[owners[rank]];
4231   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4232   current_space = free_space;
4233 
4234   /* determine symbolic info for each local row */
4235   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4236   nextrow = buf_ri_k + merge->nrecv;
4237   nextai  = nextrow + merge->nrecv;
4238   for (k=0; k<merge->nrecv; k++){
4239     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4240     nrows = *buf_ri_k[k];
4241     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4242     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4243   }
4244 
4245   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4246   len = 0;
4247   for (i=0;i<m;i++) {
4248     bnzi   = 0;
4249     /* add local non-zero cols of this proc's seqmat into lnk */
4250     arow   = owners[rank] + i;
4251     anzi   = ai[arow+1] - ai[arow];
4252     aj     = a->j + ai[arow];
4253     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4254     bnzi += nlnk;
4255     /* add received col data into lnk */
4256     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4257       if (i == *nextrow[k]) { /* i-th row */
4258         anzi = *(nextai[k]+1) - *nextai[k];
4259         aj   = buf_rj[k] + *nextai[k];
4260         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4261         bnzi += nlnk;
4262         nextrow[k]++; nextai[k]++;
4263       }
4264     }
4265     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4266 
4267     /* if free space is not available, make more free space */
4268     if (current_space->local_remaining<bnzi) {
4269       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4270       nspacedouble++;
4271     }
4272     /* copy data into free space, then initialize lnk */
4273     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4274     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4275 
4276     current_space->array           += bnzi;
4277     current_space->local_used      += bnzi;
4278     current_space->local_remaining -= bnzi;
4279 
4280     bi[i+1] = bi[i] + bnzi;
4281   }
4282 
4283   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4284 
4285   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4286   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4287   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4288 
4289   /* create symbolic parallel matrix B_mpi */
4290   /*---------------------------------------*/
4291   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4292   if (n==PETSC_DECIDE) {
4293     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4294   } else {
4295     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4296   }
4297   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4298   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4299   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4300 
4301   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4302   B_mpi->assembled     = PETSC_FALSE;
4303   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4304   merge->bi            = bi;
4305   merge->bj            = bj;
4306   merge->buf_ri        = buf_ri;
4307   merge->buf_rj        = buf_rj;
4308   merge->coi           = PETSC_NULL;
4309   merge->coj           = PETSC_NULL;
4310   merge->owners_co     = PETSC_NULL;
4311 
4312   /* attach the supporting struct to B_mpi for reuse */
4313   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4314   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4315   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4316   *mpimat = B_mpi;
4317 
4318   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4319   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4320   PetscFunctionReturn(0);
4321 }
4322 
4323 #undef __FUNCT__
4324 #define __FUNCT__ "MatMerge_SeqsToMPI"
4325 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4326 {
4327   PetscErrorCode   ierr;
4328 
4329   PetscFunctionBegin;
4330   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4331   if (scall == MAT_INITIAL_MATRIX){
4332     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4333   }
4334   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4335   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4336   PetscFunctionReturn(0);
4337 }
4338 
4339 #undef __FUNCT__
4340 #define __FUNCT__ "MatGetLocalMat"
4341 /*@
4342      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4343 
4344     Not Collective
4345 
4346    Input Parameters:
4347 +    A - the matrix
4348 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4349 
4350    Output Parameter:
4351 .    A_loc - the local sequential matrix generated
4352 
4353     Level: developer
4354 
4355 @*/
4356 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4357 {
4358   PetscErrorCode  ierr;
4359   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4360   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4361   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4362   MatScalar       *aa=a->a,*ba=b->a,*cam;
4363   PetscScalar     *ca;
4364   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4365   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4366 
4367   PetscFunctionBegin;
4368   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4369   if (scall == MAT_INITIAL_MATRIX){
4370     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4371     ci[0] = 0;
4372     for (i=0; i<am; i++){
4373       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4374     }
4375     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4376     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4377     k = 0;
4378     for (i=0; i<am; i++) {
4379       ncols_o = bi[i+1] - bi[i];
4380       ncols_d = ai[i+1] - ai[i];
4381       /* off-diagonal portion of A */
4382       for (jo=0; jo<ncols_o; jo++) {
4383         col = cmap[*bj];
4384         if (col >= cstart) break;
4385         cj[k]   = col; bj++;
4386         ca[k++] = *ba++;
4387       }
4388       /* diagonal portion of A */
4389       for (j=0; j<ncols_d; j++) {
4390         cj[k]   = cstart + *aj++;
4391         ca[k++] = *aa++;
4392       }
4393       /* off-diagonal portion of A */
4394       for (j=jo; j<ncols_o; j++) {
4395         cj[k]   = cmap[*bj++];
4396         ca[k++] = *ba++;
4397       }
4398     }
4399     /* put together the new matrix */
4400     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4401     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4402     /* Since these are PETSc arrays, change flags to free them as necessary. */
4403     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4404     mat->free_a  = PETSC_TRUE;
4405     mat->free_ij = PETSC_TRUE;
4406     mat->nonew   = 0;
4407   } else if (scall == MAT_REUSE_MATRIX){
4408     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4409     ci = mat->i; cj = mat->j; cam = mat->a;
4410     for (i=0; i<am; i++) {
4411       /* off-diagonal portion of A */
4412       ncols_o = bi[i+1] - bi[i];
4413       for (jo=0; jo<ncols_o; jo++) {
4414         col = cmap[*bj];
4415         if (col >= cstart) break;
4416         *cam++ = *ba++; bj++;
4417       }
4418       /* diagonal portion of A */
4419       ncols_d = ai[i+1] - ai[i];
4420       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4421       /* off-diagonal portion of A */
4422       for (j=jo; j<ncols_o; j++) {
4423         *cam++ = *ba++; bj++;
4424       }
4425     }
4426   } else {
4427     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4428   }
4429 
4430   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4431   PetscFunctionReturn(0);
4432 }
4433 
4434 #undef __FUNCT__
4435 #define __FUNCT__ "MatGetLocalMatCondensed"
4436 /*@C
4437      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4438 
4439     Not Collective
4440 
4441    Input Parameters:
4442 +    A - the matrix
4443 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4444 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4445 
4446    Output Parameter:
4447 .    A_loc - the local sequential matrix generated
4448 
4449     Level: developer
4450 
4451 @*/
4452 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4453 {
4454   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4455   PetscErrorCode    ierr;
4456   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4457   IS                isrowa,iscola;
4458   Mat               *aloc;
4459 
4460   PetscFunctionBegin;
4461   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4462   if (!row){
4463     start = A->rmap->rstart; end = A->rmap->rend;
4464     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4465   } else {
4466     isrowa = *row;
4467   }
4468   if (!col){
4469     start = A->cmap->rstart;
4470     cmap  = a->garray;
4471     nzA   = a->A->cmap->n;
4472     nzB   = a->B->cmap->n;
4473     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4474     ncols = 0;
4475     for (i=0; i<nzB; i++) {
4476       if (cmap[i] < start) idx[ncols++] = cmap[i];
4477       else break;
4478     }
4479     imark = i;
4480     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4481     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4482     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4483     ierr = PetscFree(idx);CHKERRQ(ierr);
4484   } else {
4485     iscola = *col;
4486   }
4487   if (scall != MAT_INITIAL_MATRIX){
4488     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4489     aloc[0] = *A_loc;
4490   }
4491   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4492   *A_loc = aloc[0];
4493   ierr = PetscFree(aloc);CHKERRQ(ierr);
4494   if (!row){
4495     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4496   }
4497   if (!col){
4498     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4499   }
4500   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4501   PetscFunctionReturn(0);
4502 }
4503 
4504 #undef __FUNCT__
4505 #define __FUNCT__ "MatGetBrowsOfAcols"
4506 /*@C
4507     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4508 
4509     Collective on Mat
4510 
4511    Input Parameters:
4512 +    A,B - the matrices in mpiaij format
4513 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4514 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4515 
4516    Output Parameter:
4517 +    rowb, colb - index sets of rows and columns of B to extract
4518 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4519 -    B_seq - the sequential matrix generated
4520 
4521     Level: developer
4522 
4523 @*/
4524 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4525 {
4526   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4527   PetscErrorCode    ierr;
4528   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4529   IS                isrowb,iscolb;
4530   Mat               *bseq;
4531 
4532   PetscFunctionBegin;
4533   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4534     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4535   }
4536   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4537 
4538   if (scall == MAT_INITIAL_MATRIX){
4539     start = A->cmap->rstart;
4540     cmap  = a->garray;
4541     nzA   = a->A->cmap->n;
4542     nzB   = a->B->cmap->n;
4543     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4544     ncols = 0;
4545     for (i=0; i<nzB; i++) {  /* row < local row index */
4546       if (cmap[i] < start) idx[ncols++] = cmap[i];
4547       else break;
4548     }
4549     imark = i;
4550     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4551     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4552     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4553     ierr = PetscFree(idx);CHKERRQ(ierr);
4554     *brstart = imark;
4555     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4556   } else {
4557     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4558     isrowb = *rowb; iscolb = *colb;
4559     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4560     bseq[0] = *B_seq;
4561   }
4562   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4563   *B_seq = bseq[0];
4564   ierr = PetscFree(bseq);CHKERRQ(ierr);
4565   if (!rowb){
4566     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4567   } else {
4568     *rowb = isrowb;
4569   }
4570   if (!colb){
4571     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4572   } else {
4573     *colb = iscolb;
4574   }
4575   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4576   PetscFunctionReturn(0);
4577 }
4578 
4579 #undef __FUNCT__
4580 #define __FUNCT__ "MatGetBrowsOfAoCols"
4581 /*@C
4582     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4583     of the OFF-DIAGONAL portion of local A
4584 
4585     Collective on Mat
4586 
4587    Input Parameters:
4588 +    A,B - the matrices in mpiaij format
4589 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4590 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4591 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4592 
4593    Output Parameter:
4594 +    B_oth - the sequential matrix generated
4595 
4596     Level: developer
4597 
4598 @*/
4599 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth)
4600 {
4601   VecScatter_MPI_General *gen_to,*gen_from;
4602   PetscErrorCode         ierr;
4603   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4604   Mat_SeqAIJ             *b_oth;
4605   VecScatter             ctx=a->Mvctx;
4606   MPI_Comm               comm=((PetscObject)ctx)->comm;
4607   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4608   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4609   PetscScalar            *rvalues,*svalues;
4610   MatScalar              *b_otha,*bufa,*bufA;
4611   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4612   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4613   MPI_Status             *sstatus,rstatus;
4614   PetscMPIInt            jj;
4615   PetscInt               *cols,sbs,rbs;
4616   PetscScalar            *vals;
4617 
4618   PetscFunctionBegin;
4619   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4620     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4621   }
4622   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4623   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4624 
4625   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4626   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4627   rvalues  = gen_from->values; /* holds the length of receiving row */
4628   svalues  = gen_to->values;   /* holds the length of sending row */
4629   nrecvs   = gen_from->n;
4630   nsends   = gen_to->n;
4631 
4632   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4633   srow     = gen_to->indices;   /* local row index to be sent */
4634   sstarts  = gen_to->starts;
4635   sprocs   = gen_to->procs;
4636   sstatus  = gen_to->sstatus;
4637   sbs      = gen_to->bs;
4638   rstarts  = gen_from->starts;
4639   rprocs   = gen_from->procs;
4640   rbs      = gen_from->bs;
4641 
4642   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4643   if (scall == MAT_INITIAL_MATRIX){
4644     /* i-array */
4645     /*---------*/
4646     /*  post receives */
4647     for (i=0; i<nrecvs; i++){
4648       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4649       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4650       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4651     }
4652 
4653     /* pack the outgoing message */
4654     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4655     rstartsj = sstartsj + nsends +1;
4656     sstartsj[0] = 0;  rstartsj[0] = 0;
4657     len = 0; /* total length of j or a array to be sent */
4658     k = 0;
4659     for (i=0; i<nsends; i++){
4660       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4661       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4662       for (j=0; j<nrows; j++) {
4663         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4664         for (l=0; l<sbs; l++){
4665           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4666           rowlen[j*sbs+l] = ncols;
4667           len += ncols;
4668           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4669         }
4670         k++;
4671       }
4672       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4673       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4674     }
4675     /* recvs and sends of i-array are completed */
4676     i = nrecvs;
4677     while (i--) {
4678       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4679     }
4680     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4681 
4682     /* allocate buffers for sending j and a arrays */
4683     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4684     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4685 
4686     /* create i-array of B_oth */
4687     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4688     b_othi[0] = 0;
4689     len = 0; /* total length of j or a array to be received */
4690     k = 0;
4691     for (i=0; i<nrecvs; i++){
4692       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4693       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4694       for (j=0; j<nrows; j++) {
4695         b_othi[k+1] = b_othi[k] + rowlen[j];
4696         len += rowlen[j]; k++;
4697       }
4698       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4699     }
4700 
4701     /* allocate space for j and a arrrays of B_oth */
4702     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4703     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4704 
4705     /* j-array */
4706     /*---------*/
4707     /*  post receives of j-array */
4708     for (i=0; i<nrecvs; i++){
4709       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4710       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4711     }
4712 
4713     /* pack the outgoing message j-array */
4714     k = 0;
4715     for (i=0; i<nsends; i++){
4716       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4717       bufJ = bufj+sstartsj[i];
4718       for (j=0; j<nrows; j++) {
4719         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4720         for (ll=0; ll<sbs; ll++){
4721           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4722           for (l=0; l<ncols; l++){
4723             *bufJ++ = cols[l];
4724           }
4725           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4726         }
4727       }
4728       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4729     }
4730 
4731     /* recvs and sends of j-array are completed */
4732     i = nrecvs;
4733     while (i--) {
4734       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4735     }
4736     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4737   } else if (scall == MAT_REUSE_MATRIX){
4738     sstartsj = *startsj;
4739     rstartsj = sstartsj + nsends +1;
4740     bufa     = *bufa_ptr;
4741     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4742     b_otha   = b_oth->a;
4743   } else {
4744     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4745   }
4746 
4747   /* a-array */
4748   /*---------*/
4749   /*  post receives of a-array */
4750   for (i=0; i<nrecvs; i++){
4751     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4752     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4753   }
4754 
4755   /* pack the outgoing message a-array */
4756   k = 0;
4757   for (i=0; i<nsends; i++){
4758     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4759     bufA = bufa+sstartsj[i];
4760     for (j=0; j<nrows; j++) {
4761       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4762       for (ll=0; ll<sbs; ll++){
4763         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4764         for (l=0; l<ncols; l++){
4765           *bufA++ = vals[l];
4766         }
4767         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4768       }
4769     }
4770     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4771   }
4772   /* recvs and sends of a-array are completed */
4773   i = nrecvs;
4774   while (i--) {
4775     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4776   }
4777   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4778   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4779 
4780   if (scall == MAT_INITIAL_MATRIX){
4781     /* put together the new matrix */
4782     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4783 
4784     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4785     /* Since these are PETSc arrays, change flags to free them as necessary. */
4786     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4787     b_oth->free_a  = PETSC_TRUE;
4788     b_oth->free_ij = PETSC_TRUE;
4789     b_oth->nonew   = 0;
4790 
4791     ierr = PetscFree(bufj);CHKERRQ(ierr);
4792     if (!startsj || !bufa_ptr){
4793       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4794       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4795     } else {
4796       *startsj  = sstartsj;
4797       *bufa_ptr = bufa;
4798     }
4799   }
4800   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4801   PetscFunctionReturn(0);
4802 }
4803 
4804 #undef __FUNCT__
4805 #define __FUNCT__ "MatGetCommunicationStructs"
4806 /*@C
4807   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4808 
4809   Not Collective
4810 
4811   Input Parameters:
4812 . A - The matrix in mpiaij format
4813 
4814   Output Parameter:
4815 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4816 . colmap - A map from global column index to local index into lvec
4817 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4818 
4819   Level: developer
4820 
4821 @*/
4822 #if defined (PETSC_USE_CTABLE)
4823 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4824 #else
4825 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4826 #endif
4827 {
4828   Mat_MPIAIJ *a;
4829 
4830   PetscFunctionBegin;
4831   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4832   PetscValidPointer(lvec, 2)
4833   PetscValidPointer(colmap, 3)
4834   PetscValidPointer(multScatter, 4)
4835   a = (Mat_MPIAIJ *) A->data;
4836   if (lvec) *lvec = a->lvec;
4837   if (colmap) *colmap = a->colmap;
4838   if (multScatter) *multScatter = a->Mvctx;
4839   PetscFunctionReturn(0);
4840 }
4841 
4842 EXTERN_C_BEGIN
4843 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*);
4844 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*);
4845 EXTERN_C_END
4846 
4847 #include "../src/mat/impls/dense/mpi/mpidense.h"
4848 
4849 #undef __FUNCT__
4850 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4851 /*
4852     Computes (B'*A')' since computing B*A directly is untenable
4853 
4854                n                       p                          p
4855         (              )       (              )         (                  )
4856       m (      A       )  *  n (       B      )   =   m (         C        )
4857         (              )       (              )         (                  )
4858 
4859 */
4860 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4861 {
4862   PetscErrorCode     ierr;
4863   Mat                At,Bt,Ct;
4864 
4865   PetscFunctionBegin;
4866   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4867   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4868   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4869   ierr = MatDestroy(At);CHKERRQ(ierr);
4870   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4871   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4872   ierr = MatDestroy(Ct);CHKERRQ(ierr);
4873   PetscFunctionReturn(0);
4874 }
4875 
4876 #undef __FUNCT__
4877 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4878 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4879 {
4880   PetscErrorCode ierr;
4881   PetscInt       m=A->rmap->n,n=B->cmap->n;
4882   Mat            Cmat;
4883 
4884   PetscFunctionBegin;
4885   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4886   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
4887   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4888   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4889   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
4890   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4891   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4892   *C   = Cmat;
4893   PetscFunctionReturn(0);
4894 }
4895 
4896 /* ----------------------------------------------------------------*/
4897 #undef __FUNCT__
4898 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4899 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4900 {
4901   PetscErrorCode ierr;
4902 
4903   PetscFunctionBegin;
4904   if (scall == MAT_INITIAL_MATRIX){
4905     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4906   }
4907   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4908   PetscFunctionReturn(0);
4909 }
4910 
4911 EXTERN_C_BEGIN
4912 #if defined(PETSC_HAVE_MUMPS)
4913 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*);
4914 #endif
4915 #if defined(PETSC_HAVE_PASTIX)
4916 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
4917 #endif
4918 #if defined(PETSC_HAVE_SUPERLU_DIST)
4919 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
4920 #endif
4921 #if defined(PETSC_HAVE_SPOOLES)
4922 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
4923 #endif
4924 EXTERN_C_END
4925 
4926 /*MC
4927    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4928 
4929    Options Database Keys:
4930 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4931 
4932   Level: beginner
4933 
4934 .seealso: MatCreateMPIAIJ()
4935 M*/
4936 
4937 EXTERN_C_BEGIN
4938 #undef __FUNCT__
4939 #define __FUNCT__ "MatCreate_MPIAIJ"
4940 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
4941 {
4942   Mat_MPIAIJ     *b;
4943   PetscErrorCode ierr;
4944   PetscMPIInt    size;
4945 
4946   PetscFunctionBegin;
4947   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
4948 
4949   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
4950   B->data         = (void*)b;
4951   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4952   B->rmap->bs      = 1;
4953   B->assembled    = PETSC_FALSE;
4954   B->mapping      = 0;
4955 
4956   B->insertmode      = NOT_SET_VALUES;
4957   b->size            = size;
4958   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
4959 
4960   /* build cache for off array entries formed */
4961   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
4962   b->donotstash  = PETSC_FALSE;
4963   b->colmap      = 0;
4964   b->garray      = 0;
4965   b->roworiented = PETSC_TRUE;
4966 
4967   /* stuff used for matrix vector multiply */
4968   b->lvec      = PETSC_NULL;
4969   b->Mvctx     = PETSC_NULL;
4970 
4971   /* stuff for MatGetRow() */
4972   b->rowindices   = 0;
4973   b->rowvalues    = 0;
4974   b->getrowactive = PETSC_FALSE;
4975 
4976 #if defined(PETSC_HAVE_SPOOLES)
4977   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_spooles_C",
4978                                      "MatGetFactor_mpiaij_spooles",
4979                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
4980 #endif
4981 #if defined(PETSC_HAVE_MUMPS)
4982   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_mumps_C",
4983                                      "MatGetFactor_mpiaij_mumps",
4984                                      MatGetFactor_mpiaij_mumps);CHKERRQ(ierr);
4985 #endif
4986 #if defined(PETSC_HAVE_PASTIX)
4987   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_pastix_C",
4988 					   "MatGetFactor_mpiaij_pastix",
4989 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
4990 #endif
4991 #if defined(PETSC_HAVE_SUPERLU_DIST)
4992   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_superlu_dist_C",
4993                                      "MatGetFactor_mpiaij_superlu_dist",
4994                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
4995 #endif
4996   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
4997                                      "MatStoreValues_MPIAIJ",
4998                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
4999   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5000                                      "MatRetrieveValues_MPIAIJ",
5001                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5002   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5003 				     "MatGetDiagonalBlock_MPIAIJ",
5004                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5005   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5006 				     "MatIsTranspose_MPIAIJ",
5007 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5008   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5009 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5010 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5011   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5012 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5013 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5014   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5015 				     "MatDiagonalScaleLocal_MPIAIJ",
5016 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5017   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
5018                                      "MatConvert_MPIAIJ_MPICSRPERM",
5019                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
5020   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
5021                                      "MatConvert_MPIAIJ_MPICRL",
5022                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
5023   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5024                                      "MatMatMult_MPIDense_MPIAIJ",
5025                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5026   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5027                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5028                                       MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5029   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5030                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5031                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5032   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5033   PetscFunctionReturn(0);
5034 }
5035 EXTERN_C_END
5036 
5037 #undef __FUNCT__
5038 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5039 /*@
5040      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5041          and "off-diagonal" part of the matrix in CSR format.
5042 
5043    Collective on MPI_Comm
5044 
5045    Input Parameters:
5046 +  comm - MPI communicator
5047 .  m - number of local rows (Cannot be PETSC_DECIDE)
5048 .  n - This value should be the same as the local size used in creating the
5049        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5050        calculated if N is given) For square matrices n is almost always m.
5051 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5052 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5053 .   i - row indices for "diagonal" portion of matrix
5054 .   j - column indices
5055 .   a - matrix values
5056 .   oi - row indices for "off-diagonal" portion of matrix
5057 .   oj - column indices
5058 -   oa - matrix values
5059 
5060    Output Parameter:
5061 .   mat - the matrix
5062 
5063    Level: advanced
5064 
5065    Notes:
5066        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5067 
5068        The i and j indices are 0 based
5069 
5070        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5071 
5072 
5073 .keywords: matrix, aij, compressed row, sparse, parallel
5074 
5075 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5076           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5077 @*/
5078 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5079 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5080 {
5081   PetscErrorCode ierr;
5082   Mat_MPIAIJ     *maij;
5083 
5084  PetscFunctionBegin;
5085   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5086   if (i[0]) {
5087     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5088   }
5089   if (oi[0]) {
5090     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5091   }
5092   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5093   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5094   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5095   maij = (Mat_MPIAIJ*) (*mat)->data;
5096   maij->donotstash     = PETSC_TRUE;
5097   (*mat)->preallocated = PETSC_TRUE;
5098 
5099   ierr = PetscMapSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5100   ierr = PetscMapSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5101   ierr = PetscMapSetUp((*mat)->rmap);CHKERRQ(ierr);
5102   ierr = PetscMapSetUp((*mat)->cmap);CHKERRQ(ierr);
5103 
5104   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5105   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5106 
5107   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5108   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5109   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5110   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5111 
5112   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5113   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5114   PetscFunctionReturn(0);
5115 }
5116 
5117 /*
5118     Special version for direct calls from Fortran
5119 */
5120 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5121 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5122 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5123 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5124 #endif
5125 
5126 /* Change these macros so can be used in void function */
5127 #undef CHKERRQ
5128 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5129 #undef SETERRQ2
5130 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5131 #undef SETERRQ
5132 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5133 
5134 EXTERN_C_BEGIN
5135 #undef __FUNCT__
5136 #define __FUNCT__ "matsetvaluesmpiaij_"
5137 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5138 {
5139   Mat             mat = *mmat;
5140   PetscInt        m = *mm, n = *mn;
5141   InsertMode      addv = *maddv;
5142   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5143   PetscScalar     value;
5144   PetscErrorCode  ierr;
5145 
5146   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5147   if (mat->insertmode == NOT_SET_VALUES) {
5148     mat->insertmode = addv;
5149   }
5150 #if defined(PETSC_USE_DEBUG)
5151   else if (mat->insertmode != addv) {
5152     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5153   }
5154 #endif
5155   {
5156   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5157   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5158   PetscTruth      roworiented = aij->roworiented;
5159 
5160   /* Some Variables required in the macro */
5161   Mat             A = aij->A;
5162   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5163   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5164   MatScalar       *aa = a->a;
5165   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5166   Mat             B = aij->B;
5167   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5168   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5169   MatScalar       *ba = b->a;
5170 
5171   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5172   PetscInt        nonew = a->nonew;
5173   MatScalar       *ap1,*ap2;
5174 
5175   PetscFunctionBegin;
5176   for (i=0; i<m; i++) {
5177     if (im[i] < 0) continue;
5178 #if defined(PETSC_USE_DEBUG)
5179     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5180 #endif
5181     if (im[i] >= rstart && im[i] < rend) {
5182       row      = im[i] - rstart;
5183       lastcol1 = -1;
5184       rp1      = aj + ai[row];
5185       ap1      = aa + ai[row];
5186       rmax1    = aimax[row];
5187       nrow1    = ailen[row];
5188       low1     = 0;
5189       high1    = nrow1;
5190       lastcol2 = -1;
5191       rp2      = bj + bi[row];
5192       ap2      = ba + bi[row];
5193       rmax2    = bimax[row];
5194       nrow2    = bilen[row];
5195       low2     = 0;
5196       high2    = nrow2;
5197 
5198       for (j=0; j<n; j++) {
5199         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5200         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5201         if (in[j] >= cstart && in[j] < cend){
5202           col = in[j] - cstart;
5203           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5204         } else if (in[j] < 0) continue;
5205 #if defined(PETSC_USE_DEBUG)
5206         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5207 #endif
5208         else {
5209           if (mat->was_assembled) {
5210             if (!aij->colmap) {
5211               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5212             }
5213 #if defined (PETSC_USE_CTABLE)
5214             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5215 	    col--;
5216 #else
5217             col = aij->colmap[in[j]] - 1;
5218 #endif
5219             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5220               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5221               col =  in[j];
5222               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5223               B = aij->B;
5224               b = (Mat_SeqAIJ*)B->data;
5225               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5226               rp2      = bj + bi[row];
5227               ap2      = ba + bi[row];
5228               rmax2    = bimax[row];
5229               nrow2    = bilen[row];
5230               low2     = 0;
5231               high2    = nrow2;
5232               bm       = aij->B->rmap->n;
5233               ba = b->a;
5234             }
5235           } else col = in[j];
5236           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5237         }
5238       }
5239     } else {
5240       if (!aij->donotstash) {
5241         if (roworiented) {
5242           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5243         } else {
5244            ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5245         }
5246       }
5247     }
5248   }}
5249   PetscFunctionReturnVoid();
5250 }
5251 EXTERN_C_END
5252 
5253