xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 13980483147cdb1d36b57c63ba7afca9e238fadf)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "../src/inline/spops.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscTruth     aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap->n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap->n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap->n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap->n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 
207 #define CHUNKSIZE   15
208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
209 { \
210     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
211     lastcol1 = col;\
212     while (high1-low1 > 5) { \
213       t = (low1+high1)/2; \
214       if (rp1[t] > col) high1 = t; \
215       else             low1  = t; \
216     } \
217       for (_i=low1; _i<high1; _i++) { \
218         if (rp1[_i] > col) break; \
219         if (rp1[_i] == col) { \
220           if (addv == ADD_VALUES) ap1[_i] += value;   \
221           else                    ap1[_i] = value; \
222           goto a_noinsert; \
223         } \
224       }  \
225       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
226       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
227       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
228       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
229       N = nrow1++ - 1; a->nz++; high1++; \
230       /* shift up all the later entries in this row */ \
231       for (ii=N; ii>=_i; ii--) { \
232         rp1[ii+1] = rp1[ii]; \
233         ap1[ii+1] = ap1[ii]; \
234       } \
235       rp1[_i] = col;  \
236       ap1[_i] = value;  \
237       a_noinsert: ; \
238       ailen[row] = nrow1; \
239 }
240 
241 
242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
243 { \
244     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
245     lastcol2 = col;\
246     while (high2-low2 > 5) { \
247       t = (low2+high2)/2; \
248       if (rp2[t] > col) high2 = t; \
249       else             low2  = t; \
250     } \
251     for (_i=low2; _i<high2; _i++) {		\
252       if (rp2[_i] > col) break;			\
253       if (rp2[_i] == col) {			      \
254 	if (addv == ADD_VALUES) ap2[_i] += value;     \
255 	else                    ap2[_i] = value;      \
256 	goto b_noinsert;			      \
257       }						      \
258     }							      \
259     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
260     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
261     if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
262     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
263     N = nrow2++ - 1; b->nz++; high2++;					\
264     /* shift up all the later entries in this row */			\
265     for (ii=N; ii>=_i; ii--) {						\
266       rp2[ii+1] = rp2[ii];						\
267       ap2[ii+1] = ap2[ii];						\
268     }									\
269     rp2[_i] = col;							\
270     ap2[_i] = value;							\
271     b_noinsert: ;								\
272     bilen[row] = nrow2;							\
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
278 {
279   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
280   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
281   PetscErrorCode ierr;
282   PetscInt       l,*garray = mat->garray,diag;
283 
284   PetscFunctionBegin;
285   /* code only works for square matrices A */
286 
287   /* find size of row to the left of the diagonal part */
288   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
289   row  = row - diag;
290   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
291     if (garray[b->j[b->i[row]+l]] > diag) break;
292   }
293   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
294 
295   /* diagonal part */
296   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
297 
298   /* right of diagonal part */
299   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
300   PetscFunctionReturn(0);
301 }
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "MatSetValues_MPIAIJ"
305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
306 {
307   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
308   PetscScalar    value;
309   PetscErrorCode ierr;
310   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
311   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
312   PetscTruth     roworiented = aij->roworiented;
313 
314   /* Some Variables required in the macro */
315   Mat            A = aij->A;
316   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
317   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
318   MatScalar      *aa = a->a;
319   PetscTruth     ignorezeroentries = a->ignorezeroentries;
320   Mat            B = aij->B;
321   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
322   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
323   MatScalar      *ba = b->a;
324 
325   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
326   PetscInt       nonew = a->nonew;
327   MatScalar      *ap1,*ap2;
328 
329   PetscFunctionBegin;
330   for (i=0; i<m; i++) {
331     if (im[i] < 0) continue;
332 #if defined(PETSC_USE_DEBUG)
333     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
334 #endif
335     if (im[i] >= rstart && im[i] < rend) {
336       row      = im[i] - rstart;
337       lastcol1 = -1;
338       rp1      = aj + ai[row];
339       ap1      = aa + ai[row];
340       rmax1    = aimax[row];
341       nrow1    = ailen[row];
342       low1     = 0;
343       high1    = nrow1;
344       lastcol2 = -1;
345       rp2      = bj + bi[row];
346       ap2      = ba + bi[row];
347       rmax2    = bimax[row];
348       nrow2    = bilen[row];
349       low2     = 0;
350       high2    = nrow2;
351 
352       for (j=0; j<n; j++) {
353         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
354         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
355         if (in[j] >= cstart && in[j] < cend){
356           col = in[j] - cstart;
357           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
358         } else if (in[j] < 0) continue;
359 #if defined(PETSC_USE_DEBUG)
360         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
361 #endif
362         else {
363           if (mat->was_assembled) {
364             if (!aij->colmap) {
365               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
366             }
367 #if defined (PETSC_USE_CTABLE)
368             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
369 	    col--;
370 #else
371             col = aij->colmap[in[j]] - 1;
372 #endif
373             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
374               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
375               col =  in[j];
376               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
377               B = aij->B;
378               b = (Mat_SeqAIJ*)B->data;
379               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
380               rp2      = bj + bi[row];
381               ap2      = ba + bi[row];
382               rmax2    = bimax[row];
383               nrow2    = bilen[row];
384               low2     = 0;
385               high2    = nrow2;
386               bm       = aij->B->rmap->n;
387               ba = b->a;
388             }
389           } else col = in[j];
390           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
391         }
392       }
393     } else {
394       if (!aij->donotstash) {
395         if (roworiented) {
396           if (ignorezeroentries && v[i*n] == 0.0) continue;
397           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
398         } else {
399           if (ignorezeroentries && v[i] == 0.0) continue;
400           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
401         }
402       }
403     }
404   }
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNCT__
409 #define __FUNCT__ "MatGetValues_MPIAIJ"
410 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
411 {
412   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
413   PetscErrorCode ierr;
414   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
415   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
416 
417   PetscFunctionBegin;
418   for (i=0; i<m; i++) {
419     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
420     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
421     if (idxm[i] >= rstart && idxm[i] < rend) {
422       row = idxm[i] - rstart;
423       for (j=0; j<n; j++) {
424         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
425         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
426         if (idxn[j] >= cstart && idxn[j] < cend){
427           col = idxn[j] - cstart;
428           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
429         } else {
430           if (!aij->colmap) {
431             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
432           }
433 #if defined (PETSC_USE_CTABLE)
434           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
435           col --;
436 #else
437           col = aij->colmap[idxn[j]] - 1;
438 #endif
439           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
440           else {
441             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
442           }
443         }
444       }
445     } else {
446       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
447     }
448   }
449   PetscFunctionReturn(0);
450 }
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscErrorCode ierr;
458   PetscInt       nstash,reallocs;
459   InsertMode     addv;
460 
461   PetscFunctionBegin;
462   if (aij->donotstash) {
463     PetscFunctionReturn(0);
464   }
465 
466   /* make sure all processors are either in INSERTMODE or ADDMODE */
467   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
468   if (addv == (ADD_VALUES|INSERT_VALUES)) {
469     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
470   }
471   mat->insertmode = addv; /* in case this processor had no cache */
472 
473   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
474   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
475   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
482 {
483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
484   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
485   PetscErrorCode ierr;
486   PetscMPIInt    n;
487   PetscInt       i,j,rstart,ncols,flg;
488   PetscInt       *row,*col;
489   PetscTruth     other_disassembled;
490   PetscScalar    *val;
491   InsertMode     addv = mat->insertmode;
492 
493   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
494   PetscFunctionBegin;
495   if (!aij->donotstash) {
496     while (1) {
497       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
498       if (!flg) break;
499 
500       for (i=0; i<n;) {
501         /* Now identify the consecutive vals belonging to the same row */
502         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
503         if (j < n) ncols = j-i;
504         else       ncols = n-i;
505         /* Now assemble all these values with a single function call */
506         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
507         i = j;
508       }
509     }
510     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
511   }
512   a->compressedrow.use     = PETSC_FALSE;
513   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
514   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
515 
516   /* determine if any processor has disassembled, if so we must
517      also disassemble ourselfs, in order that we may reassemble. */
518   /*
519      if nonzero structure of submatrix B cannot change then we know that
520      no processor disassembled thus we can skip this stuff
521   */
522   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
523     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
524     if (mat->was_assembled && !other_disassembled) {
525       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
526     }
527   }
528   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
529     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
530   }
531   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
532   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
533   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
534   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
535 
536   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
537   aij->rowvalues = 0;
538 
539   /* used by MatAXPY() */
540   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
541   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
542 
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
549 {
550   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
551   PetscErrorCode ierr;
552 
553   PetscFunctionBegin;
554   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
555   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNCT__
560 #define __FUNCT__ "MatZeroRows_MPIAIJ"
561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
562 {
563   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
564   PetscErrorCode ierr;
565   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
566   PetscInt       i,*owners = A->rmap->range;
567   PetscInt       *nprocs,j,idx,nsends,row;
568   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
569   PetscInt       *rvalues,count,base,slen,*source;
570   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
571   MPI_Comm       comm = ((PetscObject)A)->comm;
572   MPI_Request    *send_waits,*recv_waits;
573   MPI_Status     recv_status,*send_status;
574 #if defined(PETSC_DEBUG)
575   PetscTruth     found = PETSC_FALSE;
576 #endif
577 
578   PetscFunctionBegin;
579   /*  first count number of contributors to each processor */
580   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
581   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
582   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
583   j = 0;
584   for (i=0; i<N; i++) {
585     if (lastidx > (idx = rows[i])) j = 0;
586     lastidx = idx;
587     for (; j<size; j++) {
588       if (idx >= owners[j] && idx < owners[j+1]) {
589         nprocs[2*j]++;
590         nprocs[2*j+1] = 1;
591         owner[i] = j;
592 #if defined(PETSC_DEBUG)
593         found = PETSC_TRUE;
594 #endif
595         break;
596       }
597     }
598 #if defined(PETSC_DEBUG)
599     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
600     found = PETSC_FALSE;
601 #endif
602   }
603   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
604 
605   /* inform other processors of number of messages and max length*/
606   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
607 
608   /* post receives:   */
609   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
610   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
611   for (i=0; i<nrecvs; i++) {
612     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
613   }
614 
615   /* do sends:
616       1) starts[i] gives the starting index in svalues for stuff going to
617          the ith processor
618   */
619   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
620   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
621   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
622   starts[0] = 0;
623   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
624   for (i=0; i<N; i++) {
625     svalues[starts[owner[i]]++] = rows[i];
626   }
627 
628   starts[0] = 0;
629   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
630   count = 0;
631   for (i=0; i<size; i++) {
632     if (nprocs[2*i+1]) {
633       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
634     }
635   }
636   ierr = PetscFree(starts);CHKERRQ(ierr);
637 
638   base = owners[rank];
639 
640   /*  wait on receives */
641   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
642   source = lens + nrecvs;
643   count  = nrecvs; slen = 0;
644   while (count) {
645     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
646     /* unpack receives into our local space */
647     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
648     source[imdex]  = recv_status.MPI_SOURCE;
649     lens[imdex]    = n;
650     slen          += n;
651     count--;
652   }
653   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
654 
655   /* move the data into the send scatter */
656   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
657   count = 0;
658   for (i=0; i<nrecvs; i++) {
659     values = rvalues + i*nmax;
660     for (j=0; j<lens[i]; j++) {
661       lrows[count++] = values[j] - base;
662     }
663   }
664   ierr = PetscFree(rvalues);CHKERRQ(ierr);
665   ierr = PetscFree(lens);CHKERRQ(ierr);
666   ierr = PetscFree(owner);CHKERRQ(ierr);
667   ierr = PetscFree(nprocs);CHKERRQ(ierr);
668 
669   /* actually zap the local rows */
670   /*
671         Zero the required rows. If the "diagonal block" of the matrix
672      is square and the user wishes to set the diagonal we use separate
673      code so that MatSetValues() is not called for each diagonal allocating
674      new memory, thus calling lots of mallocs and slowing things down.
675 
676        Contributed by: Matthew Knepley
677   */
678   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
679   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
680   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
681     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
682   } else if (diag != 0.0) {
683     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
684     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
685       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
687     }
688     for (i = 0; i < slen; i++) {
689       row  = lrows[i] + rstart;
690       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
691     }
692     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
693     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
694   } else {
695     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
696   }
697   ierr = PetscFree(lrows);CHKERRQ(ierr);
698 
699   /* wait on sends */
700   if (nsends) {
701     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
702     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
703     ierr = PetscFree(send_status);CHKERRQ(ierr);
704   }
705   ierr = PetscFree(send_waits);CHKERRQ(ierr);
706   ierr = PetscFree(svalues);CHKERRQ(ierr);
707 
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNCT__
712 #define __FUNCT__ "MatMult_MPIAIJ"
713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
714 {
715   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
716   PetscErrorCode ierr;
717   PetscInt       nt;
718 
719   PetscFunctionBegin;
720   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
721   if (nt != A->cmap->n) {
722     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
723   }
724   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
725   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
726   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
727   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
728   PetscFunctionReturn(0);
729 }
730 
731 #undef __FUNCT__
732 #define __FUNCT__ "MatMultAdd_MPIAIJ"
733 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
734 {
735   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
736   PetscErrorCode ierr;
737 
738   PetscFunctionBegin;
739   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
740   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
741   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
742   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
743   PetscFunctionReturn(0);
744 }
745 
746 #undef __FUNCT__
747 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
748 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
749 {
750   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
751   PetscErrorCode ierr;
752   PetscTruth     merged;
753 
754   PetscFunctionBegin;
755   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
756   /* do nondiagonal part */
757   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
758   if (!merged) {
759     /* send it on its way */
760     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
761     /* do local part */
762     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
763     /* receive remote parts: note this assumes the values are not actually */
764     /* added in yy until the next line, */
765     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
766   } else {
767     /* do local part */
768     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
769     /* send it on its way */
770     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
771     /* values actually were received in the Begin() but we need to call this nop */
772     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
773   }
774   PetscFunctionReturn(0);
775 }
776 
777 EXTERN_C_BEGIN
778 #undef __FUNCT__
779 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
780 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
781 {
782   MPI_Comm       comm;
783   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
784   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
785   IS             Me,Notme;
786   PetscErrorCode ierr;
787   PetscInt       M,N,first,last,*notme,i;
788   PetscMPIInt    size;
789 
790   PetscFunctionBegin;
791 
792   /* Easy test: symmetric diagonal block */
793   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
794   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
795   if (!*f) PetscFunctionReturn(0);
796   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
797   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
798   if (size == 1) PetscFunctionReturn(0);
799 
800   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
801   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
802   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
803   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
804   for (i=0; i<first; i++) notme[i] = i;
805   for (i=last; i<M; i++) notme[i-last+first] = i;
806   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
807   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
808   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
809   Aoff = Aoffs[0];
810   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
811   Boff = Boffs[0];
812   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
813   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
814   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
815   ierr = ISDestroy(Me);CHKERRQ(ierr);
816   ierr = ISDestroy(Notme);CHKERRQ(ierr);
817 
818   PetscFunctionReturn(0);
819 }
820 EXTERN_C_END
821 
822 #undef __FUNCT__
823 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
824 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
825 {
826   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
827   PetscErrorCode ierr;
828 
829   PetscFunctionBegin;
830   /* do nondiagonal part */
831   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
832   /* send it on its way */
833   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
834   /* do local part */
835   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
836   /* receive remote parts */
837   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
838   PetscFunctionReturn(0);
839 }
840 
841 /*
842   This only works correctly for square matrices where the subblock A->A is the
843    diagonal block
844 */
845 #undef __FUNCT__
846 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
847 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
848 {
849   PetscErrorCode ierr;
850   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
851 
852   PetscFunctionBegin;
853   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
854   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) {
855     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
856   }
857   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
858   PetscFunctionReturn(0);
859 }
860 
861 #undef __FUNCT__
862 #define __FUNCT__ "MatScale_MPIAIJ"
863 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
864 {
865   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
866   PetscErrorCode ierr;
867 
868   PetscFunctionBegin;
869   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
870   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
871   PetscFunctionReturn(0);
872 }
873 
874 #undef __FUNCT__
875 #define __FUNCT__ "MatDestroy_MPIAIJ"
876 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
877 {
878   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
879   PetscErrorCode ierr;
880 
881   PetscFunctionBegin;
882 #if defined(PETSC_USE_LOG)
883   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
884 #endif
885   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
886   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
887   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
888 #if defined (PETSC_USE_CTABLE)
889   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
890 #else
891   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
892 #endif
893   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
894   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
895   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
896   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
897   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
898   ierr = PetscFree(aij);CHKERRQ(ierr);
899 
900   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
901   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
902   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
903   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
904   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
905   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
906   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
907   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
908   PetscFunctionReturn(0);
909 }
910 
911 #undef __FUNCT__
912 #define __FUNCT__ "MatView_MPIAIJ_Binary"
913 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
914 {
915   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
916   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
917   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
918   PetscErrorCode    ierr;
919   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
920   int               fd;
921   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
922   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
923   PetscScalar       *column_values;
924 
925   PetscFunctionBegin;
926   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
927   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
928   nz   = A->nz + B->nz;
929   if (!rank) {
930     header[0] = MAT_FILE_COOKIE;
931     header[1] = mat->rmap->N;
932     header[2] = mat->cmap->N;
933     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
934     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
935     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
936     /* get largest number of rows any processor has */
937     rlen = mat->rmap->n;
938     range = mat->rmap->range;
939     for (i=1; i<size; i++) {
940       rlen = PetscMax(rlen,range[i+1] - range[i]);
941     }
942   } else {
943     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
944     rlen = mat->rmap->n;
945   }
946 
947   /* load up the local row counts */
948   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
949   for (i=0; i<mat->rmap->n; i++) {
950     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
951   }
952 
953   /* store the row lengths to the file */
954   if (!rank) {
955     MPI_Status status;
956     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
957     for (i=1; i<size; i++) {
958       rlen = range[i+1] - range[i];
959       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
960       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
961     }
962   } else {
963     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
964   }
965   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
966 
967   /* load up the local column indices */
968   nzmax = nz; /* )th processor needs space a largest processor needs */
969   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
970   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
971   cnt  = 0;
972   for (i=0; i<mat->rmap->n; i++) {
973     for (j=B->i[i]; j<B->i[i+1]; j++) {
974       if ( (col = garray[B->j[j]]) > cstart) break;
975       column_indices[cnt++] = col;
976     }
977     for (k=A->i[i]; k<A->i[i+1]; k++) {
978       column_indices[cnt++] = A->j[k] + cstart;
979     }
980     for (; j<B->i[i+1]; j++) {
981       column_indices[cnt++] = garray[B->j[j]];
982     }
983   }
984   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
985 
986   /* store the column indices to the file */
987   if (!rank) {
988     MPI_Status status;
989     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
990     for (i=1; i<size; i++) {
991       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
992       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
993       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
994       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
995     }
996   } else {
997     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
998     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
999   }
1000   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1001 
1002   /* load up the local column values */
1003   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1004   cnt  = 0;
1005   for (i=0; i<mat->rmap->n; i++) {
1006     for (j=B->i[i]; j<B->i[i+1]; j++) {
1007       if ( garray[B->j[j]] > cstart) break;
1008       column_values[cnt++] = B->a[j];
1009     }
1010     for (k=A->i[i]; k<A->i[i+1]; k++) {
1011       column_values[cnt++] = A->a[k];
1012     }
1013     for (; j<B->i[i+1]; j++) {
1014       column_values[cnt++] = B->a[j];
1015     }
1016   }
1017   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1018 
1019   /* store the column values to the file */
1020   if (!rank) {
1021     MPI_Status status;
1022     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1023     for (i=1; i<size; i++) {
1024       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1025       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1026       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1027       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1028     }
1029   } else {
1030     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1031     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1032   }
1033   ierr = PetscFree(column_values);CHKERRQ(ierr);
1034   PetscFunctionReturn(0);
1035 }
1036 
1037 #undef __FUNCT__
1038 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1039 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1040 {
1041   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1042   PetscErrorCode    ierr;
1043   PetscMPIInt       rank = aij->rank,size = aij->size;
1044   PetscTruth        isdraw,iascii,isbinary;
1045   PetscViewer       sviewer;
1046   PetscViewerFormat format;
1047 
1048   PetscFunctionBegin;
1049   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1050   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1051   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1052   if (iascii) {
1053     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1054     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1055       MatInfo    info;
1056       PetscTruth inodes;
1057 
1058       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1059       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1060       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1061       if (!inodes) {
1062         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1063 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1064       } else {
1065         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1066 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1067       }
1068       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1069       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1070       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1071       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1072       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1073       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1074       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1075       PetscFunctionReturn(0);
1076     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1077       PetscInt   inodecount,inodelimit,*inodes;
1078       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1079       if (inodes) {
1080         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1081       } else {
1082         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1083       }
1084       PetscFunctionReturn(0);
1085     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1086       PetscFunctionReturn(0);
1087     }
1088   } else if (isbinary) {
1089     if (size == 1) {
1090       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1091       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1092     } else {
1093       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1094     }
1095     PetscFunctionReturn(0);
1096   } else if (isdraw) {
1097     PetscDraw  draw;
1098     PetscTruth isnull;
1099     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1100     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1101   }
1102 
1103   if (size == 1) {
1104     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1105     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1106   } else {
1107     /* assemble the entire matrix onto first processor. */
1108     Mat         A;
1109     Mat_SeqAIJ  *Aloc;
1110     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1111     MatScalar   *a;
1112 
1113     if (mat->rmap->N > 1024) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 512 rows, use binary format instead");
1114 
1115     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1116     if (!rank) {
1117       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1118     } else {
1119       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1120     }
1121     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1122     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1123     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1124     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1125 
1126     /* copy over the A part */
1127     Aloc = (Mat_SeqAIJ*)aij->A->data;
1128     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1129     row = mat->rmap->rstart;
1130     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1131     for (i=0; i<m; i++) {
1132       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1133       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1134     }
1135     aj = Aloc->j;
1136     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1137 
1138     /* copy over the B part */
1139     Aloc = (Mat_SeqAIJ*)aij->B->data;
1140     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1141     row  = mat->rmap->rstart;
1142     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1143     ct   = cols;
1144     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1145     for (i=0; i<m; i++) {
1146       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1147       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1148     }
1149     ierr = PetscFree(ct);CHKERRQ(ierr);
1150     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1151     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1152     /*
1153        Everyone has to call to draw the matrix since the graphics waits are
1154        synchronized across all processors that share the PetscDraw object
1155     */
1156     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1157     if (!rank) {
1158       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1159       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1160     }
1161     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1162     ierr = MatDestroy(A);CHKERRQ(ierr);
1163   }
1164   PetscFunctionReturn(0);
1165 }
1166 
1167 #undef __FUNCT__
1168 #define __FUNCT__ "MatView_MPIAIJ"
1169 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1170 {
1171   PetscErrorCode ierr;
1172   PetscTruth     iascii,isdraw,issocket,isbinary;
1173 
1174   PetscFunctionBegin;
1175   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1176   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1177   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1178   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1179   if (iascii || isdraw || isbinary || issocket) {
1180     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1181   } else {
1182     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1183   }
1184   PetscFunctionReturn(0);
1185 }
1186 
1187 #undef __FUNCT__
1188 #define __FUNCT__ "MatRelax_MPIAIJ"
1189 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1190 {
1191   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1192   PetscErrorCode ierr;
1193   Vec            bb1;
1194 
1195   PetscFunctionBegin;
1196   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1197 
1198   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1199     if (flag & SOR_ZERO_INITIAL_GUESS) {
1200       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1201       its--;
1202     }
1203 
1204     while (its--) {
1205       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1206       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1207 
1208       /* update rhs: bb1 = bb - B*x */
1209       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1210       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1211 
1212       /* local sweep */
1213       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
1214     }
1215   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1216     if (flag & SOR_ZERO_INITIAL_GUESS) {
1217       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1218       its--;
1219     }
1220     while (its--) {
1221       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1222       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1223 
1224       /* update rhs: bb1 = bb - B*x */
1225       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1226       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1227 
1228       /* local sweep */
1229       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1230     }
1231   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1232     if (flag & SOR_ZERO_INITIAL_GUESS) {
1233       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1234       its--;
1235     }
1236     while (its--) {
1237       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1238       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1239 
1240       /* update rhs: bb1 = bb - B*x */
1241       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1242       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1243 
1244       /* local sweep */
1245       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1246     }
1247   } else {
1248     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1249   }
1250 
1251   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 #undef __FUNCT__
1256 #define __FUNCT__ "MatPermute_MPIAIJ"
1257 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1258 {
1259   MPI_Comm       comm,pcomm;
1260   PetscInt       first,local_size,nrows;
1261   const PetscInt *rows;
1262   int            ntids;
1263   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1264   PetscErrorCode ierr;
1265 
1266   PetscFunctionBegin;
1267   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1268   /* make a collective version of 'rowp' */
1269   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1270   if (pcomm==comm) {
1271     crowp = rowp;
1272   } else {
1273     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1274     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1275     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1276     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1277   }
1278   /* collect the global row permutation and invert it */
1279   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1280   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1281   if (pcomm!=comm) {
1282     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1283   }
1284   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1285   /* get the local target indices */
1286   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1287   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1288   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1289   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1290   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1291   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1292   /* the column permutation is so much easier;
1293      make a local version of 'colp' and invert it */
1294   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1295   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1296   if (ntids==1) {
1297     lcolp = colp;
1298   } else {
1299     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1300     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1301     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1302   }
1303   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1304   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1305   if (ntids>1) {
1306     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1307     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1308   }
1309   /* now we just get the submatrix */
1310   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1311   /* clean up */
1312   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1313   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1314   PetscFunctionReturn(0);
1315 }
1316 
1317 #undef __FUNCT__
1318 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1319 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1320 {
1321   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1322   Mat            A = mat->A,B = mat->B;
1323   PetscErrorCode ierr;
1324   PetscReal      isend[5],irecv[5];
1325 
1326   PetscFunctionBegin;
1327   info->block_size     = 1.0;
1328   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1329   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1330   isend[3] = info->memory;  isend[4] = info->mallocs;
1331   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1332   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1333   isend[3] += info->memory;  isend[4] += info->mallocs;
1334   if (flag == MAT_LOCAL) {
1335     info->nz_used      = isend[0];
1336     info->nz_allocated = isend[1];
1337     info->nz_unneeded  = isend[2];
1338     info->memory       = isend[3];
1339     info->mallocs      = isend[4];
1340   } else if (flag == MAT_GLOBAL_MAX) {
1341     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1342     info->nz_used      = irecv[0];
1343     info->nz_allocated = irecv[1];
1344     info->nz_unneeded  = irecv[2];
1345     info->memory       = irecv[3];
1346     info->mallocs      = irecv[4];
1347   } else if (flag == MAT_GLOBAL_SUM) {
1348     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1349     info->nz_used      = irecv[0];
1350     info->nz_allocated = irecv[1];
1351     info->nz_unneeded  = irecv[2];
1352     info->memory       = irecv[3];
1353     info->mallocs      = irecv[4];
1354   }
1355   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1356   info->fill_ratio_needed = 0;
1357   info->factor_mallocs    = 0;
1358 
1359   PetscFunctionReturn(0);
1360 }
1361 
1362 #undef __FUNCT__
1363 #define __FUNCT__ "MatSetOption_MPIAIJ"
1364 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1365 {
1366   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1367   PetscErrorCode ierr;
1368 
1369   PetscFunctionBegin;
1370   switch (op) {
1371   case MAT_NEW_NONZERO_LOCATIONS:
1372   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1373   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1374   case MAT_KEEP_ZEROED_ROWS:
1375   case MAT_NEW_NONZERO_LOCATION_ERR:
1376   case MAT_USE_INODES:
1377   case MAT_IGNORE_ZERO_ENTRIES:
1378     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1379     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1380     break;
1381   case MAT_ROW_ORIENTED:
1382     a->roworiented = flg;
1383     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1384     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1385     break;
1386   case MAT_NEW_DIAGONALS:
1387     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1388     break;
1389   case MAT_IGNORE_OFF_PROC_ENTRIES:
1390     a->donotstash = PETSC_TRUE;
1391     break;
1392   case MAT_SYMMETRIC:
1393     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1394     break;
1395   case MAT_STRUCTURALLY_SYMMETRIC:
1396   case MAT_HERMITIAN:
1397   case MAT_SYMMETRY_ETERNAL:
1398     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1399     break;
1400   default:
1401     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1402   }
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 #undef __FUNCT__
1407 #define __FUNCT__ "MatGetRow_MPIAIJ"
1408 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1409 {
1410   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1411   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1412   PetscErrorCode ierr;
1413   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1414   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1415   PetscInt       *cmap,*idx_p;
1416 
1417   PetscFunctionBegin;
1418   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1419   mat->getrowactive = PETSC_TRUE;
1420 
1421   if (!mat->rowvalues && (idx || v)) {
1422     /*
1423         allocate enough space to hold information from the longest row.
1424     */
1425     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1426     PetscInt     max = 1,tmp;
1427     for (i=0; i<matin->rmap->n; i++) {
1428       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1429       if (max < tmp) { max = tmp; }
1430     }
1431     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1432     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1433   }
1434 
1435   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1436   lrow = row - rstart;
1437 
1438   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1439   if (!v)   {pvA = 0; pvB = 0;}
1440   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1441   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1442   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1443   nztot = nzA + nzB;
1444 
1445   cmap  = mat->garray;
1446   if (v  || idx) {
1447     if (nztot) {
1448       /* Sort by increasing column numbers, assuming A and B already sorted */
1449       PetscInt imark = -1;
1450       if (v) {
1451         *v = v_p = mat->rowvalues;
1452         for (i=0; i<nzB; i++) {
1453           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1454           else break;
1455         }
1456         imark = i;
1457         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1458         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1459       }
1460       if (idx) {
1461         *idx = idx_p = mat->rowindices;
1462         if (imark > -1) {
1463           for (i=0; i<imark; i++) {
1464             idx_p[i] = cmap[cworkB[i]];
1465           }
1466         } else {
1467           for (i=0; i<nzB; i++) {
1468             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1469             else break;
1470           }
1471           imark = i;
1472         }
1473         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1474         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1475       }
1476     } else {
1477       if (idx) *idx = 0;
1478       if (v)   *v   = 0;
1479     }
1480   }
1481   *nz = nztot;
1482   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1483   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1484   PetscFunctionReturn(0);
1485 }
1486 
1487 #undef __FUNCT__
1488 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1489 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1490 {
1491   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1492 
1493   PetscFunctionBegin;
1494   if (!aij->getrowactive) {
1495     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1496   }
1497   aij->getrowactive = PETSC_FALSE;
1498   PetscFunctionReturn(0);
1499 }
1500 
1501 #undef __FUNCT__
1502 #define __FUNCT__ "MatNorm_MPIAIJ"
1503 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1504 {
1505   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1506   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1507   PetscErrorCode ierr;
1508   PetscInt       i,j,cstart = mat->cmap->rstart;
1509   PetscReal      sum = 0.0;
1510   MatScalar      *v;
1511 
1512   PetscFunctionBegin;
1513   if (aij->size == 1) {
1514     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1515   } else {
1516     if (type == NORM_FROBENIUS) {
1517       v = amat->a;
1518       for (i=0; i<amat->nz; i++) {
1519 #if defined(PETSC_USE_COMPLEX)
1520         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1521 #else
1522         sum += (*v)*(*v); v++;
1523 #endif
1524       }
1525       v = bmat->a;
1526       for (i=0; i<bmat->nz; i++) {
1527 #if defined(PETSC_USE_COMPLEX)
1528         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1529 #else
1530         sum += (*v)*(*v); v++;
1531 #endif
1532       }
1533       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1534       *norm = sqrt(*norm);
1535     } else if (type == NORM_1) { /* max column norm */
1536       PetscReal *tmp,*tmp2;
1537       PetscInt  *jj,*garray = aij->garray;
1538       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1539       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1540       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1541       *norm = 0.0;
1542       v = amat->a; jj = amat->j;
1543       for (j=0; j<amat->nz; j++) {
1544         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1545       }
1546       v = bmat->a; jj = bmat->j;
1547       for (j=0; j<bmat->nz; j++) {
1548         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1549       }
1550       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1551       for (j=0; j<mat->cmap->N; j++) {
1552         if (tmp2[j] > *norm) *norm = tmp2[j];
1553       }
1554       ierr = PetscFree(tmp);CHKERRQ(ierr);
1555       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1556     } else if (type == NORM_INFINITY) { /* max row norm */
1557       PetscReal ntemp = 0.0;
1558       for (j=0; j<aij->A->rmap->n; j++) {
1559         v = amat->a + amat->i[j];
1560         sum = 0.0;
1561         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1562           sum += PetscAbsScalar(*v); v++;
1563         }
1564         v = bmat->a + bmat->i[j];
1565         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1566           sum += PetscAbsScalar(*v); v++;
1567         }
1568         if (sum > ntemp) ntemp = sum;
1569       }
1570       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1571     } else {
1572       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1573     }
1574   }
1575   PetscFunctionReturn(0);
1576 }
1577 
1578 #undef __FUNCT__
1579 #define __FUNCT__ "MatTranspose_MPIAIJ"
1580 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1581 {
1582   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1583   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1584   PetscErrorCode ierr;
1585   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1586   PetscInt       cstart=A->cmap->rstart,ncol;
1587   Mat            B;
1588   MatScalar      *array;
1589 
1590   PetscFunctionBegin;
1591   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1592 
1593   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1594   ai = Aloc->i; aj = Aloc->j;
1595   bi = Bloc->i; bj = Bloc->j;
1596   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1597     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1598     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1599     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1600     for (i=0; i<ai[ma]; i++){
1601       d_nnz[aj[i]] ++;
1602       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1603     }
1604 
1605     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1606     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1607     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1608     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1609     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1610   } else {
1611     B = *matout;
1612   }
1613 
1614   /* copy over the A part */
1615   array = Aloc->a;
1616   row = A->rmap->rstart;
1617   for (i=0; i<ma; i++) {
1618     ncol = ai[i+1]-ai[i];
1619     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1620     row++; array += ncol; aj += ncol;
1621   }
1622   aj = Aloc->j;
1623   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1624 
1625   /* copy over the B part */
1626   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1627   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1628   array = Bloc->a;
1629   row = A->rmap->rstart;
1630   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1631   cols_tmp = cols;
1632   for (i=0; i<mb; i++) {
1633     ncol = bi[i+1]-bi[i];
1634     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1635     row++; array += ncol; cols_tmp += ncol;
1636   }
1637   ierr = PetscFree(cols);CHKERRQ(ierr);
1638 
1639   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1640   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1641   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1642     *matout = B;
1643   } else {
1644     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1645   }
1646   PetscFunctionReturn(0);
1647 }
1648 
1649 #undef __FUNCT__
1650 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1651 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1652 {
1653   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1654   Mat            a = aij->A,b = aij->B;
1655   PetscErrorCode ierr;
1656   PetscInt       s1,s2,s3;
1657 
1658   PetscFunctionBegin;
1659   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1660   if (rr) {
1661     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1662     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1663     /* Overlap communication with computation. */
1664     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1665   }
1666   if (ll) {
1667     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1668     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1669     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1670   }
1671   /* scale  the diagonal block */
1672   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1673 
1674   if (rr) {
1675     /* Do a scatter end and then right scale the off-diagonal block */
1676     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1677     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1678   }
1679 
1680   PetscFunctionReturn(0);
1681 }
1682 
1683 #undef __FUNCT__
1684 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1685 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1686 {
1687   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1688   PetscErrorCode ierr;
1689 
1690   PetscFunctionBegin;
1691   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1692   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1693   PetscFunctionReturn(0);
1694 }
1695 #undef __FUNCT__
1696 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1697 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1698 {
1699   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1700   PetscErrorCode ierr;
1701 
1702   PetscFunctionBegin;
1703   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1704   PetscFunctionReturn(0);
1705 }
1706 
1707 #undef __FUNCT__
1708 #define __FUNCT__ "MatEqual_MPIAIJ"
1709 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1710 {
1711   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1712   Mat            a,b,c,d;
1713   PetscTruth     flg;
1714   PetscErrorCode ierr;
1715 
1716   PetscFunctionBegin;
1717   a = matA->A; b = matA->B;
1718   c = matB->A; d = matB->B;
1719 
1720   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1721   if (flg) {
1722     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1723   }
1724   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1725   PetscFunctionReturn(0);
1726 }
1727 
1728 #undef __FUNCT__
1729 #define __FUNCT__ "MatCopy_MPIAIJ"
1730 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1731 {
1732   PetscErrorCode ierr;
1733   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1734   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1735 
1736   PetscFunctionBegin;
1737   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1738   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1739     /* because of the column compression in the off-processor part of the matrix a->B,
1740        the number of columns in a->B and b->B may be different, hence we cannot call
1741        the MatCopy() directly on the two parts. If need be, we can provide a more
1742        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1743        then copying the submatrices */
1744     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1745   } else {
1746     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1747     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1748   }
1749   PetscFunctionReturn(0);
1750 }
1751 
1752 #undef __FUNCT__
1753 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1754 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1755 {
1756   PetscErrorCode ierr;
1757 
1758   PetscFunctionBegin;
1759   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1760   PetscFunctionReturn(0);
1761 }
1762 
1763 #include "petscblaslapack.h"
1764 #undef __FUNCT__
1765 #define __FUNCT__ "MatAXPY_MPIAIJ"
1766 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1767 {
1768   PetscErrorCode ierr;
1769   PetscInt       i;
1770   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1771   PetscBLASInt   bnz,one=1;
1772   Mat_SeqAIJ     *x,*y;
1773 
1774   PetscFunctionBegin;
1775   if (str == SAME_NONZERO_PATTERN) {
1776     PetscScalar alpha = a;
1777     x = (Mat_SeqAIJ *)xx->A->data;
1778     y = (Mat_SeqAIJ *)yy->A->data;
1779     bnz = PetscBLASIntCast(x->nz);
1780     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1781     x = (Mat_SeqAIJ *)xx->B->data;
1782     y = (Mat_SeqAIJ *)yy->B->data;
1783     bnz = PetscBLASIntCast(x->nz);
1784     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1785   } else if (str == SUBSET_NONZERO_PATTERN) {
1786     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1787 
1788     x = (Mat_SeqAIJ *)xx->B->data;
1789     y = (Mat_SeqAIJ *)yy->B->data;
1790     if (y->xtoy && y->XtoY != xx->B) {
1791       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1792       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1793     }
1794     if (!y->xtoy) { /* get xtoy */
1795       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1796       y->XtoY = xx->B;
1797       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1798     }
1799     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1800   } else {
1801     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1802   }
1803   PetscFunctionReturn(0);
1804 }
1805 
1806 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1807 
1808 #undef __FUNCT__
1809 #define __FUNCT__ "MatConjugate_MPIAIJ"
1810 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1811 {
1812 #if defined(PETSC_USE_COMPLEX)
1813   PetscErrorCode ierr;
1814   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1815 
1816   PetscFunctionBegin;
1817   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1818   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1819 #else
1820   PetscFunctionBegin;
1821 #endif
1822   PetscFunctionReturn(0);
1823 }
1824 
1825 #undef __FUNCT__
1826 #define __FUNCT__ "MatRealPart_MPIAIJ"
1827 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1828 {
1829   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1830   PetscErrorCode ierr;
1831 
1832   PetscFunctionBegin;
1833   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1834   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1835   PetscFunctionReturn(0);
1836 }
1837 
1838 #undef __FUNCT__
1839 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1840 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1841 {
1842   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1843   PetscErrorCode ierr;
1844 
1845   PetscFunctionBegin;
1846   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1847   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1848   PetscFunctionReturn(0);
1849 }
1850 
1851 #ifdef PETSC_HAVE_PBGL
1852 
1853 #include <boost/parallel/mpi/bsp_process_group.hpp>
1854 #include <boost/graph/distributed/ilu_default_graph.hpp>
1855 #include <boost/graph/distributed/ilu_0_block.hpp>
1856 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1857 #include <boost/graph/distributed/petsc/interface.hpp>
1858 #include <boost/multi_array.hpp>
1859 #include <boost/parallel/distributed_property_map->hpp>
1860 
1861 #undef __FUNCT__
1862 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1863 /*
1864   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1865 */
1866 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1867 {
1868   namespace petsc = boost::distributed::petsc;
1869 
1870   namespace graph_dist = boost::graph::distributed;
1871   using boost::graph::distributed::ilu_default::process_group_type;
1872   using boost::graph::ilu_permuted;
1873 
1874   PetscTruth      row_identity, col_identity;
1875   PetscContainer  c;
1876   PetscInt        m, n, M, N;
1877   PetscErrorCode  ierr;
1878 
1879   PetscFunctionBegin;
1880   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1881   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1882   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1883   if (!row_identity || !col_identity) {
1884     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1885   }
1886 
1887   process_group_type pg;
1888   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1889   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1890   lgraph_type&   level_graph = *lgraph_p;
1891   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1892 
1893   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1894   ilu_permuted(level_graph);
1895 
1896   /* put together the new matrix */
1897   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1898   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1899   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1900   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1901   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1902   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1903   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1904 
1905   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1906   ierr = PetscContainerSetPointer(c, lgraph_p);
1907   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1908   PetscFunctionReturn(0);
1909 }
1910 
1911 #undef __FUNCT__
1912 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1913 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1914 {
1915   PetscFunctionBegin;
1916   PetscFunctionReturn(0);
1917 }
1918 
1919 #undef __FUNCT__
1920 #define __FUNCT__ "MatSolve_MPIAIJ"
1921 /*
1922   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1923 */
1924 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1925 {
1926   namespace graph_dist = boost::graph::distributed;
1927 
1928   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1929   lgraph_type*   lgraph_p;
1930   PetscContainer c;
1931   PetscErrorCode ierr;
1932 
1933   PetscFunctionBegin;
1934   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1935   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1936   ierr = VecCopy(b, x); CHKERRQ(ierr);
1937 
1938   PetscScalar* array_x;
1939   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1940   PetscInt sx;
1941   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1942 
1943   PetscScalar* array_b;
1944   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1945   PetscInt sb;
1946   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1947 
1948   lgraph_type&   level_graph = *lgraph_p;
1949   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1950 
1951   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
1952   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
1953                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
1954 
1955   typedef boost::iterator_property_map<array_ref_type::iterator,
1956                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
1957   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
1958                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
1959 
1960   ilu_set_solve(*lgraph_p, vector_b, vector_x);
1961 
1962   PetscFunctionReturn(0);
1963 }
1964 #endif
1965 
1966 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
1967   PetscInt       nzlocal,nsends,nrecvs;
1968   PetscMPIInt    *send_rank;
1969   PetscInt       *sbuf_nz,*sbuf_j,**rbuf_j;
1970   PetscScalar    *sbuf_a,**rbuf_a;
1971   PetscErrorCode (*MatDestroy)(Mat);
1972 } Mat_Redundant;
1973 
1974 #undef __FUNCT__
1975 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
1976 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
1977 {
1978   PetscErrorCode       ierr;
1979   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
1980   PetscInt             i;
1981 
1982   PetscFunctionBegin;
1983   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
1984   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1985   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1986   for (i=0; i<redund->nrecvs; i++){
1987     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1988     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1989   }
1990   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1991   ierr = PetscFree(redund);CHKERRQ(ierr);
1992   PetscFunctionReturn(0);
1993 }
1994 
1995 #undef __FUNCT__
1996 #define __FUNCT__ "MatDestroy_MatRedundant"
1997 PetscErrorCode MatDestroy_MatRedundant(Mat A)
1998 {
1999   PetscErrorCode  ierr;
2000   PetscContainer  container;
2001   Mat_Redundant   *redund=PETSC_NULL;
2002 
2003   PetscFunctionBegin;
2004   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2005   if (container) {
2006     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2007   } else {
2008     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2009   }
2010   A->ops->destroy = redund->MatDestroy;
2011   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2012   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2013   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2014   PetscFunctionReturn(0);
2015 }
2016 
2017 #undef __FUNCT__
2018 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2019 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2020 {
2021   PetscMPIInt    rank,size;
2022   MPI_Comm       comm=((PetscObject)mat)->comm;
2023   PetscErrorCode ierr;
2024   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2025   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2026   PetscInt       *rowrange=mat->rmap->range;
2027   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2028   Mat            A=aij->A,B=aij->B,C=*matredundant;
2029   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2030   PetscScalar    *sbuf_a;
2031   PetscInt       nzlocal=a->nz+b->nz;
2032   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2033   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2034   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2035   MatScalar      *aworkA,*aworkB;
2036   PetscScalar    *vals;
2037   PetscMPIInt    tag1,tag2,tag3,imdex;
2038   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2039                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2040   MPI_Status     recv_status,*send_status;
2041   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2042   PetscInt       **rbuf_j=PETSC_NULL;
2043   PetscScalar    **rbuf_a=PETSC_NULL;
2044   Mat_Redundant  *redund=PETSC_NULL;
2045   PetscContainer container;
2046 
2047   PetscFunctionBegin;
2048   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2049   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2050 
2051   if (reuse == MAT_REUSE_MATRIX) {
2052     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2053     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2054     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2055     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2056     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2057     if (container) {
2058       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2059     } else {
2060       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2061     }
2062     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2063 
2064     nsends    = redund->nsends;
2065     nrecvs    = redund->nrecvs;
2066     send_rank = redund->send_rank; recv_rank = send_rank + size;
2067     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
2068     sbuf_j    = redund->sbuf_j;
2069     sbuf_a    = redund->sbuf_a;
2070     rbuf_j    = redund->rbuf_j;
2071     rbuf_a    = redund->rbuf_a;
2072   }
2073 
2074   if (reuse == MAT_INITIAL_MATRIX){
2075     PetscMPIInt  subrank,subsize;
2076     PetscInt     nleftover,np_subcomm;
2077     /* get the destination processors' id send_rank, nsends and nrecvs */
2078     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2079     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2080     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
2081     recv_rank = send_rank + size;
2082     np_subcomm = size/nsubcomm;
2083     nleftover  = size - nsubcomm*np_subcomm;
2084     nsends = 0; nrecvs = 0;
2085     for (i=0; i<size; i++){ /* i=rank*/
2086       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2087         send_rank[nsends] = i; nsends++;
2088         recv_rank[nrecvs++] = i;
2089       }
2090     }
2091     if (rank >= size - nleftover){/* this proc is a leftover processor */
2092       i = size-nleftover-1;
2093       j = 0;
2094       while (j < nsubcomm - nleftover){
2095         send_rank[nsends++] = i;
2096         i--; j++;
2097       }
2098     }
2099 
2100     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2101       for (i=0; i<nleftover; i++){
2102         recv_rank[nrecvs++] = size-nleftover+i;
2103       }
2104     }
2105 
2106     /* allocate sbuf_j, sbuf_a */
2107     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2108     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2109     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2110   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2111 
2112   /* copy mat's local entries into the buffers */
2113   if (reuse == MAT_INITIAL_MATRIX){
2114     rownz_max = 0;
2115     rptr = sbuf_j;
2116     cols = sbuf_j + rend-rstart + 1;
2117     vals = sbuf_a;
2118     rptr[0] = 0;
2119     for (i=0; i<rend-rstart; i++){
2120       row = i + rstart;
2121       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2122       ncols  = nzA + nzB;
2123       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2124       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2125       /* load the column indices for this row into cols */
2126       lwrite = 0;
2127       for (l=0; l<nzB; l++) {
2128         if ((ctmp = bmap[cworkB[l]]) < cstart){
2129           vals[lwrite]   = aworkB[l];
2130           cols[lwrite++] = ctmp;
2131         }
2132       }
2133       for (l=0; l<nzA; l++){
2134         vals[lwrite]   = aworkA[l];
2135         cols[lwrite++] = cstart + cworkA[l];
2136       }
2137       for (l=0; l<nzB; l++) {
2138         if ((ctmp = bmap[cworkB[l]]) >= cend){
2139           vals[lwrite]   = aworkB[l];
2140           cols[lwrite++] = ctmp;
2141         }
2142       }
2143       vals += ncols;
2144       cols += ncols;
2145       rptr[i+1] = rptr[i] + ncols;
2146       if (rownz_max < ncols) rownz_max = ncols;
2147     }
2148     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2149   } else { /* only copy matrix values into sbuf_a */
2150     rptr = sbuf_j;
2151     vals = sbuf_a;
2152     rptr[0] = 0;
2153     for (i=0; i<rend-rstart; i++){
2154       row = i + rstart;
2155       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2156       ncols  = nzA + nzB;
2157       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2158       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2159       lwrite = 0;
2160       for (l=0; l<nzB; l++) {
2161         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2162       }
2163       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2164       for (l=0; l<nzB; l++) {
2165         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2166       }
2167       vals += ncols;
2168       rptr[i+1] = rptr[i] + ncols;
2169     }
2170   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2171 
2172   /* send nzlocal to others, and recv other's nzlocal */
2173   /*--------------------------------------------------*/
2174   if (reuse == MAT_INITIAL_MATRIX){
2175     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2176     s_waits2 = s_waits3 + nsends;
2177     s_waits1 = s_waits2 + nsends;
2178     r_waits1 = s_waits1 + nsends;
2179     r_waits2 = r_waits1 + nrecvs;
2180     r_waits3 = r_waits2 + nrecvs;
2181   } else {
2182     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2183     r_waits3 = s_waits3 + nsends;
2184   }
2185 
2186   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2187   if (reuse == MAT_INITIAL_MATRIX){
2188     /* get new tags to keep the communication clean */
2189     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2190     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2191     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2192     rbuf_nz = sbuf_nz + nsends;
2193 
2194     /* post receives of other's nzlocal */
2195     for (i=0; i<nrecvs; i++){
2196       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2197     }
2198     /* send nzlocal to others */
2199     for (i=0; i<nsends; i++){
2200       sbuf_nz[i] = nzlocal;
2201       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2202     }
2203     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2204     count = nrecvs;
2205     while (count) {
2206       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2207       recv_rank[imdex] = recv_status.MPI_SOURCE;
2208       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2209       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2210 
2211       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2212       rbuf_nz[imdex] += i + 2;
2213       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2214       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2215       count--;
2216     }
2217     /* wait on sends of nzlocal */
2218     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2219     /* send mat->i,j to others, and recv from other's */
2220     /*------------------------------------------------*/
2221     for (i=0; i<nsends; i++){
2222       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2223       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2224     }
2225     /* wait on receives of mat->i,j */
2226     /*------------------------------*/
2227     count = nrecvs;
2228     while (count) {
2229       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2230       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2231       count--;
2232     }
2233     /* wait on sends of mat->i,j */
2234     /*---------------------------*/
2235     if (nsends) {
2236       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2237     }
2238   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2239 
2240   /* post receives, send and receive mat->a */
2241   /*----------------------------------------*/
2242   for (imdex=0; imdex<nrecvs; imdex++) {
2243     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2244   }
2245   for (i=0; i<nsends; i++){
2246     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2247   }
2248   count = nrecvs;
2249   while (count) {
2250     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2251     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2252     count--;
2253   }
2254   if (nsends) {
2255     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2256   }
2257 
2258   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2259 
2260   /* create redundant matrix */
2261   /*-------------------------*/
2262   if (reuse == MAT_INITIAL_MATRIX){
2263     /* compute rownz_max for preallocation */
2264     for (imdex=0; imdex<nrecvs; imdex++){
2265       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2266       rptr = rbuf_j[imdex];
2267       for (i=0; i<j; i++){
2268         ncols = rptr[i+1] - rptr[i];
2269         if (rownz_max < ncols) rownz_max = ncols;
2270       }
2271     }
2272 
2273     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2274     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2275     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2276     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2277     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2278   } else {
2279     C = *matredundant;
2280   }
2281 
2282   /* insert local matrix entries */
2283   rptr = sbuf_j;
2284   cols = sbuf_j + rend-rstart + 1;
2285   vals = sbuf_a;
2286   for (i=0; i<rend-rstart; i++){
2287     row   = i + rstart;
2288     ncols = rptr[i+1] - rptr[i];
2289     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2290     vals += ncols;
2291     cols += ncols;
2292   }
2293   /* insert received matrix entries */
2294   for (imdex=0; imdex<nrecvs; imdex++){
2295     rstart = rowrange[recv_rank[imdex]];
2296     rend   = rowrange[recv_rank[imdex]+1];
2297     rptr = rbuf_j[imdex];
2298     cols = rbuf_j[imdex] + rend-rstart + 1;
2299     vals = rbuf_a[imdex];
2300     for (i=0; i<rend-rstart; i++){
2301       row   = i + rstart;
2302       ncols = rptr[i+1] - rptr[i];
2303       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2304       vals += ncols;
2305       cols += ncols;
2306     }
2307   }
2308   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2309   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2310   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2311   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2312   if (reuse == MAT_INITIAL_MATRIX){
2313     PetscContainer container;
2314     *matredundant = C;
2315     /* create a supporting struct and attach it to C for reuse */
2316     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2317     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2318     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2319     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2320     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2321 
2322     redund->nzlocal = nzlocal;
2323     redund->nsends  = nsends;
2324     redund->nrecvs  = nrecvs;
2325     redund->send_rank = send_rank;
2326     redund->sbuf_nz = sbuf_nz;
2327     redund->sbuf_j  = sbuf_j;
2328     redund->sbuf_a  = sbuf_a;
2329     redund->rbuf_j  = rbuf_j;
2330     redund->rbuf_a  = rbuf_a;
2331 
2332     redund->MatDestroy = C->ops->destroy;
2333     C->ops->destroy    = MatDestroy_MatRedundant;
2334   }
2335   PetscFunctionReturn(0);
2336 }
2337 
2338 #undef __FUNCT__
2339 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2340 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2341 {
2342   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2343   PetscErrorCode ierr;
2344   PetscInt       i,*idxb = 0;
2345   PetscScalar    *va,*vb;
2346   Vec            vtmp;
2347 
2348   PetscFunctionBegin;
2349   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2350   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2351   if (idx) {
2352     for (i=0; i<A->rmap->n; i++) {
2353       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2354     }
2355   }
2356 
2357   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2358   if (idx) {
2359     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2360   }
2361   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2362   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2363 
2364   for (i=0; i<A->rmap->n; i++){
2365     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2366       va[i] = vb[i];
2367       if (idx) idx[i] = a->garray[idxb[i]];
2368     }
2369   }
2370 
2371   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2372   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2373   if (idxb) {
2374     ierr = PetscFree(idxb);CHKERRQ(ierr);
2375   }
2376   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2377   PetscFunctionReturn(0);
2378 }
2379 
2380 #undef __FUNCT__
2381 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2382 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2383 {
2384   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2385   PetscErrorCode ierr;
2386   PetscInt       i,*idxb = 0;
2387   PetscScalar    *va,*vb;
2388   Vec            vtmp;
2389 
2390   PetscFunctionBegin;
2391   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2392   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2393   if (idx) {
2394     for (i=0; i<A->cmap->n; i++) {
2395       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2396     }
2397   }
2398 
2399   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2400   if (idx) {
2401     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2402   }
2403   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2404   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2405 
2406   for (i=0; i<A->rmap->n; i++){
2407     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2408       va[i] = vb[i];
2409       if (idx) idx[i] = a->garray[idxb[i]];
2410     }
2411   }
2412 
2413   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2414   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2415   if (idxb) {
2416     ierr = PetscFree(idxb);CHKERRQ(ierr);
2417   }
2418   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2419   PetscFunctionReturn(0);
2420 }
2421 
2422 #undef __FUNCT__
2423 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2424 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2425 {
2426   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2427   PetscInt       n      = A->rmap->n;
2428   PetscInt       cstart = A->cmap->rstart;
2429   PetscInt      *cmap   = mat->garray;
2430   PetscInt      *diagIdx, *offdiagIdx;
2431   Vec            diagV, offdiagV;
2432   PetscScalar   *a, *diagA, *offdiagA;
2433   PetscInt       r;
2434   PetscErrorCode ierr;
2435 
2436   PetscFunctionBegin;
2437   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2438   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2439   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2440   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2441   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2442   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2443   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2444   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2445   for(r = 0; r < n; ++r) {
2446     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2447       a[r]   = diagA[r];
2448       idx[r] = cstart + diagIdx[r];
2449     } else {
2450       a[r]   = offdiagA[r];
2451       idx[r] = cmap[offdiagIdx[r]];
2452     }
2453   }
2454   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2455   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2456   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2457   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2458   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2459   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2460   PetscFunctionReturn(0);
2461 }
2462 
2463 #undef __FUNCT__
2464 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2465 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2466 {
2467   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2468   PetscInt       n      = A->rmap->n;
2469   PetscInt       cstart = A->cmap->rstart;
2470   PetscInt      *cmap   = mat->garray;
2471   PetscInt      *diagIdx, *offdiagIdx;
2472   Vec            diagV, offdiagV;
2473   PetscScalar   *a, *diagA, *offdiagA;
2474   PetscInt       r;
2475   PetscErrorCode ierr;
2476 
2477   PetscFunctionBegin;
2478   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2479   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2480   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2481   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2482   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2483   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2484   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2485   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2486   for(r = 0; r < n; ++r) {
2487     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2488       a[r]   = diagA[r];
2489       idx[r] = cstart + diagIdx[r];
2490     } else {
2491       a[r]   = offdiagA[r];
2492       idx[r] = cmap[offdiagIdx[r]];
2493     }
2494   }
2495   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2496   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2497   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2498   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2499   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2500   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2501   PetscFunctionReturn(0);
2502 }
2503 
2504 #undef __FUNCT__
2505 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2506 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[])
2507 {
2508   PetscErrorCode ierr;
2509 
2510   PetscFunctionBegin;
2511   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
2512   PetscFunctionReturn(0);
2513 }
2514 
2515 /* -------------------------------------------------------------------*/
2516 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2517        MatGetRow_MPIAIJ,
2518        MatRestoreRow_MPIAIJ,
2519        MatMult_MPIAIJ,
2520 /* 4*/ MatMultAdd_MPIAIJ,
2521        MatMultTranspose_MPIAIJ,
2522        MatMultTransposeAdd_MPIAIJ,
2523 #ifdef PETSC_HAVE_PBGL
2524        MatSolve_MPIAIJ,
2525 #else
2526        0,
2527 #endif
2528        0,
2529        0,
2530 /*10*/ 0,
2531        0,
2532        0,
2533        MatRelax_MPIAIJ,
2534        MatTranspose_MPIAIJ,
2535 /*15*/ MatGetInfo_MPIAIJ,
2536        MatEqual_MPIAIJ,
2537        MatGetDiagonal_MPIAIJ,
2538        MatDiagonalScale_MPIAIJ,
2539        MatNorm_MPIAIJ,
2540 /*20*/ MatAssemblyBegin_MPIAIJ,
2541        MatAssemblyEnd_MPIAIJ,
2542        0,
2543        MatSetOption_MPIAIJ,
2544        MatZeroEntries_MPIAIJ,
2545 /*25*/ MatZeroRows_MPIAIJ,
2546        0,
2547 #ifdef PETSC_HAVE_PBGL
2548        0,
2549 #else
2550        0,
2551 #endif
2552        0,
2553        0,
2554 /*30*/ MatSetUpPreallocation_MPIAIJ,
2555 #ifdef PETSC_HAVE_PBGL
2556        0,
2557 #else
2558        0,
2559 #endif
2560        0,
2561        0,
2562        0,
2563 /*35*/ MatDuplicate_MPIAIJ,
2564        0,
2565        0,
2566        0,
2567        0,
2568 /*40*/ MatAXPY_MPIAIJ,
2569        MatGetSubMatrices_MPIAIJ,
2570        MatIncreaseOverlap_MPIAIJ,
2571        MatGetValues_MPIAIJ,
2572        MatCopy_MPIAIJ,
2573 /*45*/ MatGetRowMax_MPIAIJ,
2574        MatScale_MPIAIJ,
2575        0,
2576        0,
2577        0,
2578 /*50*/ MatSetBlockSize_MPIAIJ,
2579        0,
2580        0,
2581        0,
2582        0,
2583 /*55*/ MatFDColoringCreate_MPIAIJ,
2584        0,
2585        MatSetUnfactored_MPIAIJ,
2586        MatPermute_MPIAIJ,
2587        0,
2588 /*60*/ MatGetSubMatrix_MPIAIJ,
2589        MatDestroy_MPIAIJ,
2590        MatView_MPIAIJ,
2591        0,
2592        0,
2593 /*65*/ 0,
2594        0,
2595        0,
2596        0,
2597        0,
2598 /*70*/ MatGetRowMaxAbs_MPIAIJ,
2599        MatGetRowMinAbs_MPIAIJ,
2600        0,
2601        MatSetColoring_MPIAIJ,
2602 #if defined(PETSC_HAVE_ADIC)
2603        MatSetValuesAdic_MPIAIJ,
2604 #else
2605        0,
2606 #endif
2607        MatSetValuesAdifor_MPIAIJ,
2608 /*75*/ 0,
2609        0,
2610        0,
2611        0,
2612        0,
2613 /*80*/ 0,
2614        0,
2615        0,
2616 /*84*/ MatLoad_MPIAIJ,
2617        0,
2618        0,
2619        0,
2620        0,
2621        0,
2622 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
2623        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2624        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2625        MatPtAP_Basic,
2626        MatPtAPSymbolic_MPIAIJ,
2627 /*95*/ MatPtAPNumeric_MPIAIJ,
2628        0,
2629        0,
2630        0,
2631        0,
2632 /*100*/0,
2633        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2634        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2635        MatConjugate_MPIAIJ,
2636        0,
2637 /*105*/MatSetValuesRow_MPIAIJ,
2638        MatRealPart_MPIAIJ,
2639        MatImaginaryPart_MPIAIJ,
2640        0,
2641        0,
2642 /*110*/0,
2643        MatGetRedundantMatrix_MPIAIJ,
2644        MatGetRowMin_MPIAIJ,
2645        0,
2646        0,
2647 /*115*/MatGetSeqNonzerostructure_MPIAIJ};
2648 
2649 /* ----------------------------------------------------------------------------------------*/
2650 
2651 EXTERN_C_BEGIN
2652 #undef __FUNCT__
2653 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2654 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2655 {
2656   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2657   PetscErrorCode ierr;
2658 
2659   PetscFunctionBegin;
2660   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2661   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2662   PetscFunctionReturn(0);
2663 }
2664 EXTERN_C_END
2665 
2666 EXTERN_C_BEGIN
2667 #undef __FUNCT__
2668 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2669 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2670 {
2671   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2672   PetscErrorCode ierr;
2673 
2674   PetscFunctionBegin;
2675   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2676   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2677   PetscFunctionReturn(0);
2678 }
2679 EXTERN_C_END
2680 
2681 #include "petscpc.h"
2682 EXTERN_C_BEGIN
2683 #undef __FUNCT__
2684 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2685 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2686 {
2687   Mat_MPIAIJ     *b;
2688   PetscErrorCode ierr;
2689   PetscInt       i;
2690 
2691   PetscFunctionBegin;
2692   B->preallocated = PETSC_TRUE;
2693   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2694   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2695   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2696   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2697 
2698   ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2699   ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2700   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2701   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2702   if (d_nnz) {
2703     for (i=0; i<B->rmap->n; i++) {
2704       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2705     }
2706   }
2707   if (o_nnz) {
2708     for (i=0; i<B->rmap->n; i++) {
2709       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2710     }
2711   }
2712   b = (Mat_MPIAIJ*)B->data;
2713 
2714   /* Explicitly create 2 MATSEQAIJ matrices. */
2715   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2716   ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2717   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2718   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2719   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2720   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2721   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2722   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2723 
2724   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2725   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2726 
2727   PetscFunctionReturn(0);
2728 }
2729 EXTERN_C_END
2730 
2731 #undef __FUNCT__
2732 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2733 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2734 {
2735   Mat            mat;
2736   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2737   PetscErrorCode ierr;
2738 
2739   PetscFunctionBegin;
2740   *newmat       = 0;
2741   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2742   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2743   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2744   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2745   a    = (Mat_MPIAIJ*)mat->data;
2746 
2747   mat->factor       = matin->factor;
2748   mat->rmap->bs      = matin->rmap->bs;
2749   mat->assembled    = PETSC_TRUE;
2750   mat->insertmode   = NOT_SET_VALUES;
2751   mat->preallocated = PETSC_TRUE;
2752 
2753   a->size           = oldmat->size;
2754   a->rank           = oldmat->rank;
2755   a->donotstash     = oldmat->donotstash;
2756   a->roworiented    = oldmat->roworiented;
2757   a->rowindices     = 0;
2758   a->rowvalues      = 0;
2759   a->getrowactive   = PETSC_FALSE;
2760 
2761   ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr);
2762   ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr);
2763 
2764   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2765   if (oldmat->colmap) {
2766 #if defined (PETSC_USE_CTABLE)
2767     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2768 #else
2769     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2770     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2771     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2772 #endif
2773   } else a->colmap = 0;
2774   if (oldmat->garray) {
2775     PetscInt len;
2776     len  = oldmat->B->cmap->n;
2777     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2778     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2779     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2780   } else a->garray = 0;
2781 
2782   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2783   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2784   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2785   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2786   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2787   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2788   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2789   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2790   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2791   *newmat = mat;
2792   PetscFunctionReturn(0);
2793 }
2794 
2795 #include "petscsys.h"
2796 
2797 #undef __FUNCT__
2798 #define __FUNCT__ "MatLoad_MPIAIJ"
2799 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2800 {
2801   Mat            A;
2802   PetscScalar    *vals,*svals;
2803   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2804   MPI_Status     status;
2805   PetscErrorCode ierr;
2806   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz;
2807   PetscInt       i,nz,j,rstart,rend,mmax,maxnz;
2808   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2809   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2810   PetscInt       cend,cstart,n,*rowners;
2811   int            fd;
2812 
2813   PetscFunctionBegin;
2814   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2815   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2816   if (!rank) {
2817     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2818     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2819     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2820   }
2821 
2822   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2823   M = header[1]; N = header[2];
2824   /* determine ownership of all rows */
2825   m    = M/size + ((M % size) > rank);
2826   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2827   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2828 
2829   /* First process needs enough room for process with most rows */
2830   if (!rank) {
2831     mmax       = rowners[1];
2832     for (i=2; i<size; i++) {
2833       mmax = PetscMax(mmax,rowners[i]);
2834     }
2835   } else mmax = m;
2836 
2837   rowners[0] = 0;
2838   for (i=2; i<=size; i++) {
2839     rowners[i] += rowners[i-1];
2840   }
2841   rstart = rowners[rank];
2842   rend   = rowners[rank+1];
2843 
2844   /* distribute row lengths to all processors */
2845   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2846   if (!rank) {
2847     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2848     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2849     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2850     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2851     for (j=0; j<m; j++) {
2852       procsnz[0] += ourlens[j];
2853     }
2854     for (i=1; i<size; i++) {
2855       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2856       /* calculate the number of nonzeros on each processor */
2857       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2858         procsnz[i] += rowlengths[j];
2859       }
2860       mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]);
2861       ierr   = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2862     }
2863     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2864   } else {
2865     mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr);
2866     ierr   = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2867   }
2868 
2869   if (!rank) {
2870     /* determine max buffer needed and allocate it */
2871     maxnz = 0;
2872     for (i=0; i<size; i++) {
2873       maxnz = PetscMax(maxnz,procsnz[i]);
2874     }
2875     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2876 
2877     /* read in my part of the matrix column indices  */
2878     nz   = procsnz[0];
2879     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2880     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2881 
2882     /* read in every one elses and ship off */
2883     for (i=1; i<size; i++) {
2884       nz     = procsnz[i];
2885       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2886       mpicnt = PetscMPIIntCast(nz);
2887       ierr   = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2888     }
2889     ierr = PetscFree(cols);CHKERRQ(ierr);
2890   } else {
2891     /* determine buffer space needed for message */
2892     nz = 0;
2893     for (i=0; i<m; i++) {
2894       nz += ourlens[i];
2895     }
2896     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2897 
2898     /* receive message of column indices*/
2899     mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr);
2900     ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2901     ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr);
2902     if (status.MPI_ERROR) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() status has error code %d, expected length %d",status.MPI_ERROR,mpicnt);}
2903     else if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
2904     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
2905     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
2906   }
2907 
2908   /* determine column ownership if matrix is not square */
2909   if (N != M) {
2910     n      = N/size + ((N % size) > rank);
2911     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2912     cstart = cend - n;
2913   } else {
2914     cstart = rstart;
2915     cend   = rend;
2916     n      = cend - cstart;
2917   }
2918 
2919   /* loop over local rows, determining number of off diagonal entries */
2920   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2921   jj = 0;
2922   for (i=0; i<m; i++) {
2923     for (j=0; j<ourlens[i]; j++) {
2924       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2925       jj++;
2926     }
2927   }
2928 
2929   /* create our matrix */
2930   for (i=0; i<m; i++) {
2931     ourlens[i] -= offlens[i];
2932   }
2933   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2934   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2935   ierr = MatSetType(A,type);CHKERRQ(ierr);
2936   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2937 
2938   for (i=0; i<m; i++) {
2939     ourlens[i] += offlens[i];
2940   }
2941 
2942   if (!rank) {
2943     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2944 
2945     /* read in my part of the matrix numerical values  */
2946     nz   = procsnz[0];
2947     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2948 
2949     /* insert into matrix */
2950     jj      = rstart;
2951     smycols = mycols;
2952     svals   = vals;
2953     for (i=0; i<m; i++) {
2954       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2955       smycols += ourlens[i];
2956       svals   += ourlens[i];
2957       jj++;
2958     }
2959 
2960     /* read in other processors and ship out */
2961     for (i=1; i<size; i++) {
2962       nz     = procsnz[i];
2963       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2964       mpicnt = PetscMPIIntCast(nz);
2965       ierr   = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2966     }
2967     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2968   } else {
2969     /* receive numeric values */
2970     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2971 
2972     /* receive message of values*/
2973     mpicnt = PetscMPIIntCast(nz);
2974     ierr   = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2975     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr);
2976     if (status.MPI_ERROR) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() status has error code %d, expected length %d",status.MPI_ERROR,mpicnt);}
2977     else if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
2978     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
2979     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
2980 
2981     /* insert into matrix */
2982     jj      = rstart;
2983     smycols = mycols;
2984     svals   = vals;
2985     for (i=0; i<m; i++) {
2986       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2987       smycols += ourlens[i];
2988       svals   += ourlens[i];
2989       jj++;
2990     }
2991   }
2992   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2993   ierr = PetscFree(vals);CHKERRQ(ierr);
2994   ierr = PetscFree(mycols);CHKERRQ(ierr);
2995   ierr = PetscFree(rowners);CHKERRQ(ierr);
2996 
2997   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2998   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2999   *newmat = A;
3000   PetscFunctionReturn(0);
3001 }
3002 
3003 #undef __FUNCT__
3004 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3005 /*
3006     Not great since it makes two copies of the submatrix, first an SeqAIJ
3007   in local and then by concatenating the local matrices the end result.
3008   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3009 */
3010 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3011 {
3012   PetscErrorCode ierr;
3013   PetscMPIInt    rank,size;
3014   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3015   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3016   Mat            *local,M,Mreuse;
3017   MatScalar      *vwork,*aa;
3018   MPI_Comm       comm = ((PetscObject)mat)->comm;
3019   Mat_SeqAIJ     *aij;
3020 
3021 
3022   PetscFunctionBegin;
3023   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3024   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3025 
3026   if (call ==  MAT_REUSE_MATRIX) {
3027     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3028     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3029     local = &Mreuse;
3030     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3031   } else {
3032     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3033     Mreuse = *local;
3034     ierr   = PetscFree(local);CHKERRQ(ierr);
3035   }
3036 
3037   /*
3038       m - number of local rows
3039       n - number of columns (same on all processors)
3040       rstart - first row in new global matrix generated
3041   */
3042   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3043   if (call == MAT_INITIAL_MATRIX) {
3044     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3045     ii  = aij->i;
3046     jj  = aij->j;
3047 
3048     /*
3049         Determine the number of non-zeros in the diagonal and off-diagonal
3050         portions of the matrix in order to do correct preallocation
3051     */
3052 
3053     /* first get start and end of "diagonal" columns */
3054     if (csize == PETSC_DECIDE) {
3055       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3056       if (mglobal == n) { /* square matrix */
3057 	nlocal = m;
3058       } else {
3059         nlocal = n/size + ((n % size) > rank);
3060       }
3061     } else {
3062       nlocal = csize;
3063     }
3064     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3065     rstart = rend - nlocal;
3066     if (rank == size - 1 && rend != n) {
3067       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3068     }
3069 
3070     /* next, compute all the lengths */
3071     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3072     olens = dlens + m;
3073     for (i=0; i<m; i++) {
3074       jend = ii[i+1] - ii[i];
3075       olen = 0;
3076       dlen = 0;
3077       for (j=0; j<jend; j++) {
3078         if (*jj < rstart || *jj >= rend) olen++;
3079         else dlen++;
3080         jj++;
3081       }
3082       olens[i] = olen;
3083       dlens[i] = dlen;
3084     }
3085     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3086     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3087     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3088     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3089     ierr = PetscFree(dlens);CHKERRQ(ierr);
3090   } else {
3091     PetscInt ml,nl;
3092 
3093     M = *newmat;
3094     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3095     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3096     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3097     /*
3098          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3099        rather than the slower MatSetValues().
3100     */
3101     M->was_assembled = PETSC_TRUE;
3102     M->assembled     = PETSC_FALSE;
3103   }
3104   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3105   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3106   ii  = aij->i;
3107   jj  = aij->j;
3108   aa  = aij->a;
3109   for (i=0; i<m; i++) {
3110     row   = rstart + i;
3111     nz    = ii[i+1] - ii[i];
3112     cwork = jj;     jj += nz;
3113     vwork = aa;     aa += nz;
3114     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3115   }
3116 
3117   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3118   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3119   *newmat = M;
3120 
3121   /* save submatrix used in processor for next request */
3122   if (call ==  MAT_INITIAL_MATRIX) {
3123     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3124     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3125   }
3126 
3127   PetscFunctionReturn(0);
3128 }
3129 
3130 EXTERN_C_BEGIN
3131 #undef __FUNCT__
3132 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3133 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3134 {
3135   PetscInt       m,cstart, cend,j,nnz,i,d;
3136   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3137   const PetscInt *JJ;
3138   PetscScalar    *values;
3139   PetscErrorCode ierr;
3140 
3141   PetscFunctionBegin;
3142   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3143 
3144   ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3145   ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3146   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
3147   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
3148   m      = B->rmap->n;
3149   cstart = B->cmap->rstart;
3150   cend   = B->cmap->rend;
3151   rstart = B->rmap->rstart;
3152 
3153   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
3154   o_nnz = d_nnz + m;
3155 
3156 #if defined(PETSC_USE_DEBUGGING)
3157   for (i=0; i<m; i++) {
3158     nnz     = Ii[i+1]- Ii[i];
3159     JJ      = J + Ii[i];
3160     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3161     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3162     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3163     for (j=1; j<nnz; j++) {
3164       if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j);
3165     }
3166   }
3167 #endif
3168 
3169   for (i=0; i<m; i++) {
3170     nnz     = Ii[i+1]- Ii[i];
3171     JJ      = J + Ii[i];
3172     nnz_max = PetscMax(nnz_max,nnz);
3173     for (j=0; j<nnz; j++) {
3174       if (*JJ >= cstart) break;
3175       JJ++;
3176     }
3177     d = 0;
3178     for (; j<nnz; j++) {
3179       if (*JJ++ >= cend) break;
3180       d++;
3181     }
3182     d_nnz[i] = d;
3183     o_nnz[i] = nnz - d;
3184   }
3185   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3186   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
3187 
3188   if (v) values = (PetscScalar*)v;
3189   else {
3190     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3191     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3192   }
3193 
3194   for (i=0; i<m; i++) {
3195     ii   = i + rstart;
3196     nnz  = Ii[i+1]- Ii[i];
3197     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3198   }
3199   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3200   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3201 
3202   if (!v) {
3203     ierr = PetscFree(values);CHKERRQ(ierr);
3204   }
3205   PetscFunctionReturn(0);
3206 }
3207 EXTERN_C_END
3208 
3209 #undef __FUNCT__
3210 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3211 /*@
3212    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3213    (the default parallel PETSc format).
3214 
3215    Collective on MPI_Comm
3216 
3217    Input Parameters:
3218 +  B - the matrix
3219 .  i - the indices into j for the start of each local row (starts with zero)
3220 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3221 -  v - optional values in the matrix
3222 
3223    Level: developer
3224 
3225    Notes:
3226        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3227      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3228      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3229 
3230        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3231 
3232        The format which is used for the sparse matrix input, is equivalent to a
3233     row-major ordering.. i.e for the following matrix, the input data expected is
3234     as shown:
3235 
3236         1 0 0
3237         2 0 3     P0
3238        -------
3239         4 5 6     P1
3240 
3241      Process0 [P0]: rows_owned=[0,1]
3242         i =  {0,1,3}  [size = nrow+1  = 2+1]
3243         j =  {0,0,2}  [size = nz = 6]
3244         v =  {1,2,3}  [size = nz = 6]
3245 
3246      Process1 [P1]: rows_owned=[2]
3247         i =  {0,3}    [size = nrow+1  = 1+1]
3248         j =  {0,1,2}  [size = nz = 6]
3249         v =  {4,5,6}  [size = nz = 6]
3250 
3251       The column indices for each row MUST be sorted.
3252 
3253 .keywords: matrix, aij, compressed row, sparse, parallel
3254 
3255 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3256           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3257 @*/
3258 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3259 {
3260   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3261 
3262   PetscFunctionBegin;
3263   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3264   if (f) {
3265     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3266   }
3267   PetscFunctionReturn(0);
3268 }
3269 
3270 #undef __FUNCT__
3271 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3272 /*@C
3273    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3274    (the default parallel PETSc format).  For good matrix assembly performance
3275    the user should preallocate the matrix storage by setting the parameters
3276    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3277    performance can be increased by more than a factor of 50.
3278 
3279    Collective on MPI_Comm
3280 
3281    Input Parameters:
3282 +  A - the matrix
3283 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3284            (same value is used for all local rows)
3285 .  d_nnz - array containing the number of nonzeros in the various rows of the
3286            DIAGONAL portion of the local submatrix (possibly different for each row)
3287            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3288            The size of this array is equal to the number of local rows, i.e 'm'.
3289            You must leave room for the diagonal entry even if it is zero.
3290 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3291            submatrix (same value is used for all local rows).
3292 -  o_nnz - array containing the number of nonzeros in the various rows of the
3293            OFF-DIAGONAL portion of the local submatrix (possibly different for
3294            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3295            structure. The size of this array is equal to the number
3296            of local rows, i.e 'm'.
3297 
3298    If the *_nnz parameter is given then the *_nz parameter is ignored
3299 
3300    The AIJ format (also called the Yale sparse matrix format or
3301    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3302    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3303 
3304    The parallel matrix is partitioned such that the first m0 rows belong to
3305    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3306    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3307 
3308    The DIAGONAL portion of the local submatrix of a processor can be defined
3309    as the submatrix which is obtained by extraction the part corresponding
3310    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3311    first row that belongs to the processor, and r2 is the last row belonging
3312    to the this processor. This is a square mxm matrix. The remaining portion
3313    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3314 
3315    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3316 
3317    You can call MatGetInfo() to get information on how effective the preallocation was;
3318    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3319    You can also run with the option -info and look for messages with the string
3320    malloc in them to see if additional memory allocation was needed.
3321 
3322    Example usage:
3323 
3324    Consider the following 8x8 matrix with 34 non-zero values, that is
3325    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3326    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3327    as follows:
3328 
3329 .vb
3330             1  2  0  |  0  3  0  |  0  4
3331     Proc0   0  5  6  |  7  0  0  |  8  0
3332             9  0 10  | 11  0  0  | 12  0
3333     -------------------------------------
3334            13  0 14  | 15 16 17  |  0  0
3335     Proc1   0 18  0  | 19 20 21  |  0  0
3336             0  0  0  | 22 23  0  | 24  0
3337     -------------------------------------
3338     Proc2  25 26 27  |  0  0 28  | 29  0
3339            30  0  0  | 31 32 33  |  0 34
3340 .ve
3341 
3342    This can be represented as a collection of submatrices as:
3343 
3344 .vb
3345       A B C
3346       D E F
3347       G H I
3348 .ve
3349 
3350    Where the submatrices A,B,C are owned by proc0, D,E,F are
3351    owned by proc1, G,H,I are owned by proc2.
3352 
3353    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3354    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3355    The 'M','N' parameters are 8,8, and have the same values on all procs.
3356 
3357    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3358    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3359    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3360    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3361    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3362    matrix, ans [DF] as another SeqAIJ matrix.
3363 
3364    When d_nz, o_nz parameters are specified, d_nz storage elements are
3365    allocated for every row of the local diagonal submatrix, and o_nz
3366    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3367    One way to choose d_nz and o_nz is to use the max nonzerors per local
3368    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3369    In this case, the values of d_nz,o_nz are:
3370 .vb
3371      proc0 : dnz = 2, o_nz = 2
3372      proc1 : dnz = 3, o_nz = 2
3373      proc2 : dnz = 1, o_nz = 4
3374 .ve
3375    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3376    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3377    for proc3. i.e we are using 12+15+10=37 storage locations to store
3378    34 values.
3379 
3380    When d_nnz, o_nnz parameters are specified, the storage is specified
3381    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3382    In the above case the values for d_nnz,o_nnz are:
3383 .vb
3384      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3385      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3386      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3387 .ve
3388    Here the space allocated is sum of all the above values i.e 34, and
3389    hence pre-allocation is perfect.
3390 
3391    Level: intermediate
3392 
3393 .keywords: matrix, aij, compressed row, sparse, parallel
3394 
3395 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3396           MPIAIJ, MatGetInfo()
3397 @*/
3398 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3399 {
3400   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3401 
3402   PetscFunctionBegin;
3403   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3404   if (f) {
3405     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3406   }
3407   PetscFunctionReturn(0);
3408 }
3409 
3410 #undef __FUNCT__
3411 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3412 /*@
3413      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3414          CSR format the local rows.
3415 
3416    Collective on MPI_Comm
3417 
3418    Input Parameters:
3419 +  comm - MPI communicator
3420 .  m - number of local rows (Cannot be PETSC_DECIDE)
3421 .  n - This value should be the same as the local size used in creating the
3422        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3423        calculated if N is given) For square matrices n is almost always m.
3424 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3425 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3426 .   i - row indices
3427 .   j - column indices
3428 -   a - matrix values
3429 
3430    Output Parameter:
3431 .   mat - the matrix
3432 
3433    Level: intermediate
3434 
3435    Notes:
3436        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3437      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3438      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3439 
3440        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3441 
3442        The format which is used for the sparse matrix input, is equivalent to a
3443     row-major ordering.. i.e for the following matrix, the input data expected is
3444     as shown:
3445 
3446         1 0 0
3447         2 0 3     P0
3448        -------
3449         4 5 6     P1
3450 
3451      Process0 [P0]: rows_owned=[0,1]
3452         i =  {0,1,3}  [size = nrow+1  = 2+1]
3453         j =  {0,0,2}  [size = nz = 6]
3454         v =  {1,2,3}  [size = nz = 6]
3455 
3456      Process1 [P1]: rows_owned=[2]
3457         i =  {0,3}    [size = nrow+1  = 1+1]
3458         j =  {0,1,2}  [size = nz = 6]
3459         v =  {4,5,6}  [size = nz = 6]
3460 
3461 .keywords: matrix, aij, compressed row, sparse, parallel
3462 
3463 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3464           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3465 @*/
3466 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3467 {
3468   PetscErrorCode ierr;
3469 
3470  PetscFunctionBegin;
3471   if (i[0]) {
3472     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3473   }
3474   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3475   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3476   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3477   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3478   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3479   PetscFunctionReturn(0);
3480 }
3481 
3482 #undef __FUNCT__
3483 #define __FUNCT__ "MatCreateMPIAIJ"
3484 /*@C
3485    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3486    (the default parallel PETSc format).  For good matrix assembly performance
3487    the user should preallocate the matrix storage by setting the parameters
3488    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3489    performance can be increased by more than a factor of 50.
3490 
3491    Collective on MPI_Comm
3492 
3493    Input Parameters:
3494 +  comm - MPI communicator
3495 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3496            This value should be the same as the local size used in creating the
3497            y vector for the matrix-vector product y = Ax.
3498 .  n - This value should be the same as the local size used in creating the
3499        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3500        calculated if N is given) For square matrices n is almost always m.
3501 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3502 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3503 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3504            (same value is used for all local rows)
3505 .  d_nnz - array containing the number of nonzeros in the various rows of the
3506            DIAGONAL portion of the local submatrix (possibly different for each row)
3507            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3508            The size of this array is equal to the number of local rows, i.e 'm'.
3509            You must leave room for the diagonal entry even if it is zero.
3510 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3511            submatrix (same value is used for all local rows).
3512 -  o_nnz - array containing the number of nonzeros in the various rows of the
3513            OFF-DIAGONAL portion of the local submatrix (possibly different for
3514            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3515            structure. The size of this array is equal to the number
3516            of local rows, i.e 'm'.
3517 
3518    Output Parameter:
3519 .  A - the matrix
3520 
3521    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3522    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3523    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3524 
3525    Notes:
3526    If the *_nnz parameter is given then the *_nz parameter is ignored
3527 
3528    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3529    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3530    storage requirements for this matrix.
3531 
3532    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3533    processor than it must be used on all processors that share the object for
3534    that argument.
3535 
3536    The user MUST specify either the local or global matrix dimensions
3537    (possibly both).
3538 
3539    The parallel matrix is partitioned across processors such that the
3540    first m0 rows belong to process 0, the next m1 rows belong to
3541    process 1, the next m2 rows belong to process 2 etc.. where
3542    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3543    values corresponding to [m x N] submatrix.
3544 
3545    The columns are logically partitioned with the n0 columns belonging
3546    to 0th partition, the next n1 columns belonging to the next
3547    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3548 
3549    The DIAGONAL portion of the local submatrix on any given processor
3550    is the submatrix corresponding to the rows and columns m,n
3551    corresponding to the given processor. i.e diagonal matrix on
3552    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3553    etc. The remaining portion of the local submatrix [m x (N-n)]
3554    constitute the OFF-DIAGONAL portion. The example below better
3555    illustrates this concept.
3556 
3557    For a square global matrix we define each processor's diagonal portion
3558    to be its local rows and the corresponding columns (a square submatrix);
3559    each processor's off-diagonal portion encompasses the remainder of the
3560    local matrix (a rectangular submatrix).
3561 
3562    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3563 
3564    When calling this routine with a single process communicator, a matrix of
3565    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3566    type of communicator, use the construction mechanism:
3567      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
3568 
3569    By default, this format uses inodes (identical nodes) when possible.
3570    We search for consecutive rows with the same nonzero structure, thereby
3571    reusing matrix information to achieve increased efficiency.
3572 
3573    Options Database Keys:
3574 +  -mat_no_inode  - Do not use inodes
3575 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3576 -  -mat_aij_oneindex - Internally use indexing starting at 1
3577         rather than 0.  Note that when calling MatSetValues(),
3578         the user still MUST index entries starting at 0!
3579 
3580 
3581    Example usage:
3582 
3583    Consider the following 8x8 matrix with 34 non-zero values, that is
3584    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3585    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3586    as follows:
3587 
3588 .vb
3589             1  2  0  |  0  3  0  |  0  4
3590     Proc0   0  5  6  |  7  0  0  |  8  0
3591             9  0 10  | 11  0  0  | 12  0
3592     -------------------------------------
3593            13  0 14  | 15 16 17  |  0  0
3594     Proc1   0 18  0  | 19 20 21  |  0  0
3595             0  0  0  | 22 23  0  | 24  0
3596     -------------------------------------
3597     Proc2  25 26 27  |  0  0 28  | 29  0
3598            30  0  0  | 31 32 33  |  0 34
3599 .ve
3600 
3601    This can be represented as a collection of submatrices as:
3602 
3603 .vb
3604       A B C
3605       D E F
3606       G H I
3607 .ve
3608 
3609    Where the submatrices A,B,C are owned by proc0, D,E,F are
3610    owned by proc1, G,H,I are owned by proc2.
3611 
3612    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3613    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3614    The 'M','N' parameters are 8,8, and have the same values on all procs.
3615 
3616    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3617    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3618    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3619    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3620    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3621    matrix, ans [DF] as another SeqAIJ matrix.
3622 
3623    When d_nz, o_nz parameters are specified, d_nz storage elements are
3624    allocated for every row of the local diagonal submatrix, and o_nz
3625    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3626    One way to choose d_nz and o_nz is to use the max nonzerors per local
3627    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3628    In this case, the values of d_nz,o_nz are:
3629 .vb
3630      proc0 : dnz = 2, o_nz = 2
3631      proc1 : dnz = 3, o_nz = 2
3632      proc2 : dnz = 1, o_nz = 4
3633 .ve
3634    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3635    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3636    for proc3. i.e we are using 12+15+10=37 storage locations to store
3637    34 values.
3638 
3639    When d_nnz, o_nnz parameters are specified, the storage is specified
3640    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3641    In the above case the values for d_nnz,o_nnz are:
3642 .vb
3643      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3644      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3645      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3646 .ve
3647    Here the space allocated is sum of all the above values i.e 34, and
3648    hence pre-allocation is perfect.
3649 
3650    Level: intermediate
3651 
3652 .keywords: matrix, aij, compressed row, sparse, parallel
3653 
3654 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3655           MPIAIJ, MatCreateMPIAIJWithArrays()
3656 @*/
3657 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3658 {
3659   PetscErrorCode ierr;
3660   PetscMPIInt    size;
3661 
3662   PetscFunctionBegin;
3663   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3664   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3665   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3666   if (size > 1) {
3667     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3668     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3669   } else {
3670     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3671     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3672   }
3673   PetscFunctionReturn(0);
3674 }
3675 
3676 #undef __FUNCT__
3677 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3678 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3679 {
3680   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3681 
3682   PetscFunctionBegin;
3683   *Ad     = a->A;
3684   *Ao     = a->B;
3685   *colmap = a->garray;
3686   PetscFunctionReturn(0);
3687 }
3688 
3689 #undef __FUNCT__
3690 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3691 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3692 {
3693   PetscErrorCode ierr;
3694   PetscInt       i;
3695   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3696 
3697   PetscFunctionBegin;
3698   if (coloring->ctype == IS_COLORING_GLOBAL) {
3699     ISColoringValue *allcolors,*colors;
3700     ISColoring      ocoloring;
3701 
3702     /* set coloring for diagonal portion */
3703     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3704 
3705     /* set coloring for off-diagonal portion */
3706     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3707     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3708     for (i=0; i<a->B->cmap->n; i++) {
3709       colors[i] = allcolors[a->garray[i]];
3710     }
3711     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3712     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3713     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3714     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3715   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3716     ISColoringValue *colors;
3717     PetscInt        *larray;
3718     ISColoring      ocoloring;
3719 
3720     /* set coloring for diagonal portion */
3721     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3722     for (i=0; i<a->A->cmap->n; i++) {
3723       larray[i] = i + A->cmap->rstart;
3724     }
3725     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3726     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3727     for (i=0; i<a->A->cmap->n; i++) {
3728       colors[i] = coloring->colors[larray[i]];
3729     }
3730     ierr = PetscFree(larray);CHKERRQ(ierr);
3731     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3732     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3733     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3734 
3735     /* set coloring for off-diagonal portion */
3736     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3737     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3738     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3739     for (i=0; i<a->B->cmap->n; i++) {
3740       colors[i] = coloring->colors[larray[i]];
3741     }
3742     ierr = PetscFree(larray);CHKERRQ(ierr);
3743     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3744     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3745     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3746   } else {
3747     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3748   }
3749 
3750   PetscFunctionReturn(0);
3751 }
3752 
3753 #if defined(PETSC_HAVE_ADIC)
3754 #undef __FUNCT__
3755 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3756 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3757 {
3758   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3759   PetscErrorCode ierr;
3760 
3761   PetscFunctionBegin;
3762   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3763   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3764   PetscFunctionReturn(0);
3765 }
3766 #endif
3767 
3768 #undef __FUNCT__
3769 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3770 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3771 {
3772   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3773   PetscErrorCode ierr;
3774 
3775   PetscFunctionBegin;
3776   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3777   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3778   PetscFunctionReturn(0);
3779 }
3780 
3781 #undef __FUNCT__
3782 #define __FUNCT__ "MatMerge"
3783 /*@
3784       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3785                  matrices from each processor
3786 
3787     Collective on MPI_Comm
3788 
3789    Input Parameters:
3790 +    comm - the communicators the parallel matrix will live on
3791 .    inmat - the input sequential matrices
3792 .    n - number of local columns (or PETSC_DECIDE)
3793 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3794 
3795    Output Parameter:
3796 .    outmat - the parallel matrix generated
3797 
3798     Level: advanced
3799 
3800    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3801 
3802 @*/
3803 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3804 {
3805   PetscErrorCode ierr;
3806   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3807   PetscInt       *indx;
3808   PetscScalar    *values;
3809 
3810   PetscFunctionBegin;
3811   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3812   if (scall == MAT_INITIAL_MATRIX){
3813     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3814     if (n == PETSC_DECIDE){
3815       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3816     }
3817     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3818     rstart -= m;
3819 
3820     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3821     for (i=0;i<m;i++) {
3822       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3823       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3824       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3825     }
3826     /* This routine will ONLY return MPIAIJ type matrix */
3827     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3828     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3829     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3830     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3831     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3832 
3833   } else if (scall == MAT_REUSE_MATRIX){
3834     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3835   } else {
3836     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3837   }
3838 
3839   for (i=0;i<m;i++) {
3840     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3841     Ii    = i + rstart;
3842     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3843     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3844   }
3845   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3846   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3847   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3848 
3849   PetscFunctionReturn(0);
3850 }
3851 
3852 #undef __FUNCT__
3853 #define __FUNCT__ "MatFileSplit"
3854 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3855 {
3856   PetscErrorCode    ierr;
3857   PetscMPIInt       rank;
3858   PetscInt          m,N,i,rstart,nnz;
3859   size_t            len;
3860   const PetscInt    *indx;
3861   PetscViewer       out;
3862   char              *name;
3863   Mat               B;
3864   const PetscScalar *values;
3865 
3866   PetscFunctionBegin;
3867   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3868   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3869   /* Should this be the type of the diagonal block of A? */
3870   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3871   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3872   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3873   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3874   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3875   for (i=0;i<m;i++) {
3876     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3877     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3878     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3879   }
3880   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3881   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3882 
3883   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3884   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3885   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3886   sprintf(name,"%s.%d",outfile,rank);
3887   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3888   ierr = PetscFree(name);
3889   ierr = MatView(B,out);CHKERRQ(ierr);
3890   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3891   ierr = MatDestroy(B);CHKERRQ(ierr);
3892   PetscFunctionReturn(0);
3893 }
3894 
3895 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3896 #undef __FUNCT__
3897 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3898 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3899 {
3900   PetscErrorCode       ierr;
3901   Mat_Merge_SeqsToMPI  *merge;
3902   PetscContainer       container;
3903 
3904   PetscFunctionBegin;
3905   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3906   if (container) {
3907     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3908     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3909     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3910     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3911     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3912     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3913     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3914     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3915     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3916     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3917     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3918     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
3919 
3920     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
3921     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3922   }
3923   ierr = PetscFree(merge);CHKERRQ(ierr);
3924 
3925   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3926   PetscFunctionReturn(0);
3927 }
3928 
3929 #include "../src/mat/utils/freespace.h"
3930 #include "petscbt.h"
3931 
3932 #undef __FUNCT__
3933 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3934 /*@C
3935       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3936                  matrices from each processor
3937 
3938     Collective on MPI_Comm
3939 
3940    Input Parameters:
3941 +    comm - the communicators the parallel matrix will live on
3942 .    seqmat - the input sequential matrices
3943 .    m - number of local rows (or PETSC_DECIDE)
3944 .    n - number of local columns (or PETSC_DECIDE)
3945 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3946 
3947    Output Parameter:
3948 .    mpimat - the parallel matrix generated
3949 
3950     Level: advanced
3951 
3952    Notes:
3953      The dimensions of the sequential matrix in each processor MUST be the same.
3954      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3955      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3956 @*/
3957 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3958 {
3959   PetscErrorCode       ierr;
3960   MPI_Comm             comm=((PetscObject)mpimat)->comm;
3961   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3962   PetscMPIInt          size,rank,taga,*len_s;
3963   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
3964   PetscInt             proc,m;
3965   PetscInt             **buf_ri,**buf_rj;
3966   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3967   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3968   MPI_Request          *s_waits,*r_waits;
3969   MPI_Status           *status;
3970   MatScalar            *aa=a->a;
3971   MatScalar            **abuf_r,*ba_i;
3972   Mat_Merge_SeqsToMPI  *merge;
3973   PetscContainer       container;
3974 
3975   PetscFunctionBegin;
3976   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3977 
3978   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3979   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3980 
3981   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3982   if (container) {
3983     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3984   }
3985   bi     = merge->bi;
3986   bj     = merge->bj;
3987   buf_ri = merge->buf_ri;
3988   buf_rj = merge->buf_rj;
3989 
3990   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3991   owners = merge->rowmap.range;
3992   len_s  = merge->len_s;
3993 
3994   /* send and recv matrix values */
3995   /*-----------------------------*/
3996   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3997   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3998 
3999   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4000   for (proc=0,k=0; proc<size; proc++){
4001     if (!len_s[proc]) continue;
4002     i = owners[proc];
4003     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4004     k++;
4005   }
4006 
4007   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4008   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4009   ierr = PetscFree(status);CHKERRQ(ierr);
4010 
4011   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4012   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4013 
4014   /* insert mat values of mpimat */
4015   /*----------------------------*/
4016   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4017   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4018   nextrow = buf_ri_k + merge->nrecv;
4019   nextai  = nextrow + merge->nrecv;
4020 
4021   for (k=0; k<merge->nrecv; k++){
4022     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4023     nrows = *(buf_ri_k[k]);
4024     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4025     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4026   }
4027 
4028   /* set values of ba */
4029   m = merge->rowmap.n;
4030   for (i=0; i<m; i++) {
4031     arow = owners[rank] + i;
4032     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4033     bnzi = bi[i+1] - bi[i];
4034     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4035 
4036     /* add local non-zero vals of this proc's seqmat into ba */
4037     anzi = ai[arow+1] - ai[arow];
4038     aj   = a->j + ai[arow];
4039     aa   = a->a + ai[arow];
4040     nextaj = 0;
4041     for (j=0; nextaj<anzi; j++){
4042       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4043         ba_i[j] += aa[nextaj++];
4044       }
4045     }
4046 
4047     /* add received vals into ba */
4048     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4049       /* i-th row */
4050       if (i == *nextrow[k]) {
4051         anzi = *(nextai[k]+1) - *nextai[k];
4052         aj   = buf_rj[k] + *(nextai[k]);
4053         aa   = abuf_r[k] + *(nextai[k]);
4054         nextaj = 0;
4055         for (j=0; nextaj<anzi; j++){
4056           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4057             ba_i[j] += aa[nextaj++];
4058           }
4059         }
4060         nextrow[k]++; nextai[k]++;
4061       }
4062     }
4063     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4064   }
4065   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4066   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4067 
4068   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4069   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4070   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4071   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4072   PetscFunctionReturn(0);
4073 }
4074 
4075 #undef __FUNCT__
4076 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4077 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4078 {
4079   PetscErrorCode       ierr;
4080   Mat                  B_mpi;
4081   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4082   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4083   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4084   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4085   PetscInt             len,proc,*dnz,*onz;
4086   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4087   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4088   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4089   MPI_Status           *status;
4090   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4091   PetscBT              lnkbt;
4092   Mat_Merge_SeqsToMPI  *merge;
4093   PetscContainer       container;
4094 
4095   PetscFunctionBegin;
4096   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4097 
4098   /* make sure it is a PETSc comm */
4099   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4100   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4101   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4102 
4103   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4104   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4105 
4106   /* determine row ownership */
4107   /*---------------------------------------------------------*/
4108   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
4109   merge->rowmap.n = m;
4110   merge->rowmap.N = M;
4111   merge->rowmap.bs = 1;
4112   ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr);
4113   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4114   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4115 
4116   m      = merge->rowmap.n;
4117   M      = merge->rowmap.N;
4118   owners = merge->rowmap.range;
4119 
4120   /* determine the number of messages to send, their lengths */
4121   /*---------------------------------------------------------*/
4122   len_s  = merge->len_s;
4123 
4124   len = 0;  /* length of buf_si[] */
4125   merge->nsend = 0;
4126   for (proc=0; proc<size; proc++){
4127     len_si[proc] = 0;
4128     if (proc == rank){
4129       len_s[proc] = 0;
4130     } else {
4131       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4132       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4133     }
4134     if (len_s[proc]) {
4135       merge->nsend++;
4136       nrows = 0;
4137       for (i=owners[proc]; i<owners[proc+1]; i++){
4138         if (ai[i+1] > ai[i]) nrows++;
4139       }
4140       len_si[proc] = 2*(nrows+1);
4141       len += len_si[proc];
4142     }
4143   }
4144 
4145   /* determine the number and length of messages to receive for ij-structure */
4146   /*-------------------------------------------------------------------------*/
4147   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4148   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4149 
4150   /* post the Irecv of j-structure */
4151   /*-------------------------------*/
4152   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4153   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4154 
4155   /* post the Isend of j-structure */
4156   /*--------------------------------*/
4157   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
4158   sj_waits = si_waits + merge->nsend;
4159 
4160   for (proc=0, k=0; proc<size; proc++){
4161     if (!len_s[proc]) continue;
4162     i = owners[proc];
4163     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4164     k++;
4165   }
4166 
4167   /* receives and sends of j-structure are complete */
4168   /*------------------------------------------------*/
4169   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4170   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4171 
4172   /* send and recv i-structure */
4173   /*---------------------------*/
4174   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4175   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4176 
4177   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4178   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4179   for (proc=0,k=0; proc<size; proc++){
4180     if (!len_s[proc]) continue;
4181     /* form outgoing message for i-structure:
4182          buf_si[0]:                 nrows to be sent
4183                [1:nrows]:           row index (global)
4184                [nrows+1:2*nrows+1]: i-structure index
4185     */
4186     /*-------------------------------------------*/
4187     nrows = len_si[proc]/2 - 1;
4188     buf_si_i    = buf_si + nrows+1;
4189     buf_si[0]   = nrows;
4190     buf_si_i[0] = 0;
4191     nrows = 0;
4192     for (i=owners[proc]; i<owners[proc+1]; i++){
4193       anzi = ai[i+1] - ai[i];
4194       if (anzi) {
4195         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4196         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4197         nrows++;
4198       }
4199     }
4200     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4201     k++;
4202     buf_si += len_si[proc];
4203   }
4204 
4205   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4206   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4207 
4208   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4209   for (i=0; i<merge->nrecv; i++){
4210     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4211   }
4212 
4213   ierr = PetscFree(len_si);CHKERRQ(ierr);
4214   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4215   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4216   ierr = PetscFree(si_waits);CHKERRQ(ierr);
4217   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4218   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4219   ierr = PetscFree(status);CHKERRQ(ierr);
4220 
4221   /* compute a local seq matrix in each processor */
4222   /*----------------------------------------------*/
4223   /* allocate bi array and free space for accumulating nonzero column info */
4224   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4225   bi[0] = 0;
4226 
4227   /* create and initialize a linked list */
4228   nlnk = N+1;
4229   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4230 
4231   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4232   len = 0;
4233   len  = ai[owners[rank+1]] - ai[owners[rank]];
4234   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4235   current_space = free_space;
4236 
4237   /* determine symbolic info for each local row */
4238   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4239   nextrow = buf_ri_k + merge->nrecv;
4240   nextai  = nextrow + merge->nrecv;
4241   for (k=0; k<merge->nrecv; k++){
4242     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4243     nrows = *buf_ri_k[k];
4244     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4245     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4246   }
4247 
4248   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4249   len = 0;
4250   for (i=0;i<m;i++) {
4251     bnzi   = 0;
4252     /* add local non-zero cols of this proc's seqmat into lnk */
4253     arow   = owners[rank] + i;
4254     anzi   = ai[arow+1] - ai[arow];
4255     aj     = a->j + ai[arow];
4256     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4257     bnzi += nlnk;
4258     /* add received col data into lnk */
4259     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4260       if (i == *nextrow[k]) { /* i-th row */
4261         anzi = *(nextai[k]+1) - *nextai[k];
4262         aj   = buf_rj[k] + *nextai[k];
4263         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4264         bnzi += nlnk;
4265         nextrow[k]++; nextai[k]++;
4266       }
4267     }
4268     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4269 
4270     /* if free space is not available, make more free space */
4271     if (current_space->local_remaining<bnzi) {
4272       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4273       nspacedouble++;
4274     }
4275     /* copy data into free space, then initialize lnk */
4276     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4277     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4278 
4279     current_space->array           += bnzi;
4280     current_space->local_used      += bnzi;
4281     current_space->local_remaining -= bnzi;
4282 
4283     bi[i+1] = bi[i] + bnzi;
4284   }
4285 
4286   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4287 
4288   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4289   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4290   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4291 
4292   /* create symbolic parallel matrix B_mpi */
4293   /*---------------------------------------*/
4294   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4295   if (n==PETSC_DECIDE) {
4296     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4297   } else {
4298     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4299   }
4300   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4301   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4302   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4303 
4304   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4305   B_mpi->assembled     = PETSC_FALSE;
4306   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4307   merge->bi            = bi;
4308   merge->bj            = bj;
4309   merge->buf_ri        = buf_ri;
4310   merge->buf_rj        = buf_rj;
4311   merge->coi           = PETSC_NULL;
4312   merge->coj           = PETSC_NULL;
4313   merge->owners_co     = PETSC_NULL;
4314 
4315   /* attach the supporting struct to B_mpi for reuse */
4316   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4317   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4318   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4319   *mpimat = B_mpi;
4320 
4321   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4322   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4323   PetscFunctionReturn(0);
4324 }
4325 
4326 #undef __FUNCT__
4327 #define __FUNCT__ "MatMerge_SeqsToMPI"
4328 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4329 {
4330   PetscErrorCode   ierr;
4331 
4332   PetscFunctionBegin;
4333   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4334   if (scall == MAT_INITIAL_MATRIX){
4335     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4336   }
4337   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4338   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4339   PetscFunctionReturn(0);
4340 }
4341 
4342 #undef __FUNCT__
4343 #define __FUNCT__ "MatGetLocalMat"
4344 /*@
4345      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4346 
4347     Not Collective
4348 
4349    Input Parameters:
4350 +    A - the matrix
4351 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4352 
4353    Output Parameter:
4354 .    A_loc - the local sequential matrix generated
4355 
4356     Level: developer
4357 
4358 @*/
4359 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4360 {
4361   PetscErrorCode  ierr;
4362   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4363   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4364   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4365   MatScalar       *aa=a->a,*ba=b->a,*cam;
4366   PetscScalar     *ca;
4367   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4368   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4369 
4370   PetscFunctionBegin;
4371   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4372   if (scall == MAT_INITIAL_MATRIX){
4373     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4374     ci[0] = 0;
4375     for (i=0; i<am; i++){
4376       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4377     }
4378     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4379     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4380     k = 0;
4381     for (i=0; i<am; i++) {
4382       ncols_o = bi[i+1] - bi[i];
4383       ncols_d = ai[i+1] - ai[i];
4384       /* off-diagonal portion of A */
4385       for (jo=0; jo<ncols_o; jo++) {
4386         col = cmap[*bj];
4387         if (col >= cstart) break;
4388         cj[k]   = col; bj++;
4389         ca[k++] = *ba++;
4390       }
4391       /* diagonal portion of A */
4392       for (j=0; j<ncols_d; j++) {
4393         cj[k]   = cstart + *aj++;
4394         ca[k++] = *aa++;
4395       }
4396       /* off-diagonal portion of A */
4397       for (j=jo; j<ncols_o; j++) {
4398         cj[k]   = cmap[*bj++];
4399         ca[k++] = *ba++;
4400       }
4401     }
4402     /* put together the new matrix */
4403     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4404     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4405     /* Since these are PETSc arrays, change flags to free them as necessary. */
4406     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4407     mat->free_a  = PETSC_TRUE;
4408     mat->free_ij = PETSC_TRUE;
4409     mat->nonew   = 0;
4410   } else if (scall == MAT_REUSE_MATRIX){
4411     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4412     ci = mat->i; cj = mat->j; cam = mat->a;
4413     for (i=0; i<am; i++) {
4414       /* off-diagonal portion of A */
4415       ncols_o = bi[i+1] - bi[i];
4416       for (jo=0; jo<ncols_o; jo++) {
4417         col = cmap[*bj];
4418         if (col >= cstart) break;
4419         *cam++ = *ba++; bj++;
4420       }
4421       /* diagonal portion of A */
4422       ncols_d = ai[i+1] - ai[i];
4423       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4424       /* off-diagonal portion of A */
4425       for (j=jo; j<ncols_o; j++) {
4426         *cam++ = *ba++; bj++;
4427       }
4428     }
4429   } else {
4430     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4431   }
4432 
4433   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4434   PetscFunctionReturn(0);
4435 }
4436 
4437 #undef __FUNCT__
4438 #define __FUNCT__ "MatGetLocalMatCondensed"
4439 /*@C
4440      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4441 
4442     Not Collective
4443 
4444    Input Parameters:
4445 +    A - the matrix
4446 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4447 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4448 
4449    Output Parameter:
4450 .    A_loc - the local sequential matrix generated
4451 
4452     Level: developer
4453 
4454 @*/
4455 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4456 {
4457   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4458   PetscErrorCode    ierr;
4459   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4460   IS                isrowa,iscola;
4461   Mat               *aloc;
4462 
4463   PetscFunctionBegin;
4464   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4465   if (!row){
4466     start = A->rmap->rstart; end = A->rmap->rend;
4467     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4468   } else {
4469     isrowa = *row;
4470   }
4471   if (!col){
4472     start = A->cmap->rstart;
4473     cmap  = a->garray;
4474     nzA   = a->A->cmap->n;
4475     nzB   = a->B->cmap->n;
4476     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4477     ncols = 0;
4478     for (i=0; i<nzB; i++) {
4479       if (cmap[i] < start) idx[ncols++] = cmap[i];
4480       else break;
4481     }
4482     imark = i;
4483     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4484     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4485     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4486     ierr = PetscFree(idx);CHKERRQ(ierr);
4487   } else {
4488     iscola = *col;
4489   }
4490   if (scall != MAT_INITIAL_MATRIX){
4491     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4492     aloc[0] = *A_loc;
4493   }
4494   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4495   *A_loc = aloc[0];
4496   ierr = PetscFree(aloc);CHKERRQ(ierr);
4497   if (!row){
4498     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4499   }
4500   if (!col){
4501     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4502   }
4503   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4504   PetscFunctionReturn(0);
4505 }
4506 
4507 #undef __FUNCT__
4508 #define __FUNCT__ "MatGetBrowsOfAcols"
4509 /*@C
4510     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4511 
4512     Collective on Mat
4513 
4514    Input Parameters:
4515 +    A,B - the matrices in mpiaij format
4516 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4517 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4518 
4519    Output Parameter:
4520 +    rowb, colb - index sets of rows and columns of B to extract
4521 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4522 -    B_seq - the sequential matrix generated
4523 
4524     Level: developer
4525 
4526 @*/
4527 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4528 {
4529   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4530   PetscErrorCode    ierr;
4531   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4532   IS                isrowb,iscolb;
4533   Mat               *bseq;
4534 
4535   PetscFunctionBegin;
4536   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4537     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4538   }
4539   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4540 
4541   if (scall == MAT_INITIAL_MATRIX){
4542     start = A->cmap->rstart;
4543     cmap  = a->garray;
4544     nzA   = a->A->cmap->n;
4545     nzB   = a->B->cmap->n;
4546     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4547     ncols = 0;
4548     for (i=0; i<nzB; i++) {  /* row < local row index */
4549       if (cmap[i] < start) idx[ncols++] = cmap[i];
4550       else break;
4551     }
4552     imark = i;
4553     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4554     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4555     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4556     ierr = PetscFree(idx);CHKERRQ(ierr);
4557     *brstart = imark;
4558     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4559   } else {
4560     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4561     isrowb = *rowb; iscolb = *colb;
4562     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4563     bseq[0] = *B_seq;
4564   }
4565   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4566   *B_seq = bseq[0];
4567   ierr = PetscFree(bseq);CHKERRQ(ierr);
4568   if (!rowb){
4569     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4570   } else {
4571     *rowb = isrowb;
4572   }
4573   if (!colb){
4574     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4575   } else {
4576     *colb = iscolb;
4577   }
4578   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4579   PetscFunctionReturn(0);
4580 }
4581 
4582 #undef __FUNCT__
4583 #define __FUNCT__ "MatGetBrowsOfAoCols"
4584 /*@C
4585     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4586     of the OFF-DIAGONAL portion of local A
4587 
4588     Collective on Mat
4589 
4590    Input Parameters:
4591 +    A,B - the matrices in mpiaij format
4592 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4593 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4594 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4595 
4596    Output Parameter:
4597 +    B_oth - the sequential matrix generated
4598 
4599     Level: developer
4600 
4601 @*/
4602 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth)
4603 {
4604   VecScatter_MPI_General *gen_to,*gen_from;
4605   PetscErrorCode         ierr;
4606   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4607   Mat_SeqAIJ             *b_oth;
4608   VecScatter             ctx=a->Mvctx;
4609   MPI_Comm               comm=((PetscObject)ctx)->comm;
4610   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4611   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4612   PetscScalar            *rvalues,*svalues;
4613   MatScalar              *b_otha,*bufa,*bufA;
4614   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4615   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4616   MPI_Status             *sstatus,rstatus;
4617   PetscMPIInt            jj;
4618   PetscInt               *cols,sbs,rbs;
4619   PetscScalar            *vals;
4620 
4621   PetscFunctionBegin;
4622   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4623     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4624   }
4625   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4626   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4627 
4628   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4629   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4630   rvalues  = gen_from->values; /* holds the length of receiving row */
4631   svalues  = gen_to->values;   /* holds the length of sending row */
4632   nrecvs   = gen_from->n;
4633   nsends   = gen_to->n;
4634 
4635   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4636   srow     = gen_to->indices;   /* local row index to be sent */
4637   sstarts  = gen_to->starts;
4638   sprocs   = gen_to->procs;
4639   sstatus  = gen_to->sstatus;
4640   sbs      = gen_to->bs;
4641   rstarts  = gen_from->starts;
4642   rprocs   = gen_from->procs;
4643   rbs      = gen_from->bs;
4644 
4645   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4646   if (scall == MAT_INITIAL_MATRIX){
4647     /* i-array */
4648     /*---------*/
4649     /*  post receives */
4650     for (i=0; i<nrecvs; i++){
4651       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4652       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4653       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4654     }
4655 
4656     /* pack the outgoing message */
4657     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4658     rstartsj = sstartsj + nsends +1;
4659     sstartsj[0] = 0;  rstartsj[0] = 0;
4660     len = 0; /* total length of j or a array to be sent */
4661     k = 0;
4662     for (i=0; i<nsends; i++){
4663       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4664       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4665       for (j=0; j<nrows; j++) {
4666         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4667         for (l=0; l<sbs; l++){
4668           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4669           rowlen[j*sbs+l] = ncols;
4670           len += ncols;
4671           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4672         }
4673         k++;
4674       }
4675       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4676       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4677     }
4678     /* recvs and sends of i-array are completed */
4679     i = nrecvs;
4680     while (i--) {
4681       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4682     }
4683     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4684 
4685     /* allocate buffers for sending j and a arrays */
4686     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4687     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4688 
4689     /* create i-array of B_oth */
4690     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4691     b_othi[0] = 0;
4692     len = 0; /* total length of j or a array to be received */
4693     k = 0;
4694     for (i=0; i<nrecvs; i++){
4695       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4696       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4697       for (j=0; j<nrows; j++) {
4698         b_othi[k+1] = b_othi[k] + rowlen[j];
4699         len += rowlen[j]; k++;
4700       }
4701       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4702     }
4703 
4704     /* allocate space for j and a arrrays of B_oth */
4705     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4706     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4707 
4708     /* j-array */
4709     /*---------*/
4710     /*  post receives of j-array */
4711     for (i=0; i<nrecvs; i++){
4712       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4713       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4714     }
4715 
4716     /* pack the outgoing message j-array */
4717     k = 0;
4718     for (i=0; i<nsends; i++){
4719       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4720       bufJ = bufj+sstartsj[i];
4721       for (j=0; j<nrows; j++) {
4722         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4723         for (ll=0; ll<sbs; ll++){
4724           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4725           for (l=0; l<ncols; l++){
4726             *bufJ++ = cols[l];
4727           }
4728           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4729         }
4730       }
4731       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4732     }
4733 
4734     /* recvs and sends of j-array are completed */
4735     i = nrecvs;
4736     while (i--) {
4737       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4738     }
4739     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4740   } else if (scall == MAT_REUSE_MATRIX){
4741     sstartsj = *startsj;
4742     rstartsj = sstartsj + nsends +1;
4743     bufa     = *bufa_ptr;
4744     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4745     b_otha   = b_oth->a;
4746   } else {
4747     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4748   }
4749 
4750   /* a-array */
4751   /*---------*/
4752   /*  post receives of a-array */
4753   for (i=0; i<nrecvs; i++){
4754     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4755     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4756   }
4757 
4758   /* pack the outgoing message a-array */
4759   k = 0;
4760   for (i=0; i<nsends; i++){
4761     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4762     bufA = bufa+sstartsj[i];
4763     for (j=0; j<nrows; j++) {
4764       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4765       for (ll=0; ll<sbs; ll++){
4766         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4767         for (l=0; l<ncols; l++){
4768           *bufA++ = vals[l];
4769         }
4770         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4771       }
4772     }
4773     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4774   }
4775   /* recvs and sends of a-array are completed */
4776   i = nrecvs;
4777   while (i--) {
4778     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4779   }
4780   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4781   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4782 
4783   if (scall == MAT_INITIAL_MATRIX){
4784     /* put together the new matrix */
4785     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4786 
4787     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4788     /* Since these are PETSc arrays, change flags to free them as necessary. */
4789     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4790     b_oth->free_a  = PETSC_TRUE;
4791     b_oth->free_ij = PETSC_TRUE;
4792     b_oth->nonew   = 0;
4793 
4794     ierr = PetscFree(bufj);CHKERRQ(ierr);
4795     if (!startsj || !bufa_ptr){
4796       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4797       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4798     } else {
4799       *startsj  = sstartsj;
4800       *bufa_ptr = bufa;
4801     }
4802   }
4803   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4804   PetscFunctionReturn(0);
4805 }
4806 
4807 #undef __FUNCT__
4808 #define __FUNCT__ "MatGetCommunicationStructs"
4809 /*@C
4810   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4811 
4812   Not Collective
4813 
4814   Input Parameters:
4815 . A - The matrix in mpiaij format
4816 
4817   Output Parameter:
4818 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4819 . colmap - A map from global column index to local index into lvec
4820 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4821 
4822   Level: developer
4823 
4824 @*/
4825 #if defined (PETSC_USE_CTABLE)
4826 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4827 #else
4828 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4829 #endif
4830 {
4831   Mat_MPIAIJ *a;
4832 
4833   PetscFunctionBegin;
4834   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4835   PetscValidPointer(lvec, 2)
4836   PetscValidPointer(colmap, 3)
4837   PetscValidPointer(multScatter, 4)
4838   a = (Mat_MPIAIJ *) A->data;
4839   if (lvec) *lvec = a->lvec;
4840   if (colmap) *colmap = a->colmap;
4841   if (multScatter) *multScatter = a->Mvctx;
4842   PetscFunctionReturn(0);
4843 }
4844 
4845 EXTERN_C_BEGIN
4846 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*);
4847 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*);
4848 EXTERN_C_END
4849 
4850 #include "../src/mat/impls/dense/mpi/mpidense.h"
4851 
4852 #undef __FUNCT__
4853 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4854 /*
4855     Computes (B'*A')' since computing B*A directly is untenable
4856 
4857                n                       p                          p
4858         (              )       (              )         (                  )
4859       m (      A       )  *  n (       B      )   =   m (         C        )
4860         (              )       (              )         (                  )
4861 
4862 */
4863 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4864 {
4865   PetscErrorCode     ierr;
4866   Mat                At,Bt,Ct;
4867 
4868   PetscFunctionBegin;
4869   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4870   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4871   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4872   ierr = MatDestroy(At);CHKERRQ(ierr);
4873   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4874   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4875   ierr = MatDestroy(Ct);CHKERRQ(ierr);
4876   PetscFunctionReturn(0);
4877 }
4878 
4879 #undef __FUNCT__
4880 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4881 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4882 {
4883   PetscErrorCode ierr;
4884   PetscInt       m=A->rmap->n,n=B->cmap->n;
4885   Mat            Cmat;
4886 
4887   PetscFunctionBegin;
4888   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4889   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
4890   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4891   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4892   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
4893   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4894   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4895   *C   = Cmat;
4896   PetscFunctionReturn(0);
4897 }
4898 
4899 /* ----------------------------------------------------------------*/
4900 #undef __FUNCT__
4901 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4902 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4903 {
4904   PetscErrorCode ierr;
4905 
4906   PetscFunctionBegin;
4907   if (scall == MAT_INITIAL_MATRIX){
4908     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4909   }
4910   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4911   PetscFunctionReturn(0);
4912 }
4913 
4914 EXTERN_C_BEGIN
4915 #if defined(PETSC_HAVE_MUMPS)
4916 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*);
4917 #endif
4918 #if defined(PETSC_HAVE_PASTIX)
4919 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
4920 #endif
4921 #if defined(PETSC_HAVE_SUPERLU_DIST)
4922 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
4923 #endif
4924 #if defined(PETSC_HAVE_SPOOLES)
4925 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
4926 #endif
4927 EXTERN_C_END
4928 
4929 /*MC
4930    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4931 
4932    Options Database Keys:
4933 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4934 
4935   Level: beginner
4936 
4937 .seealso: MatCreateMPIAIJ()
4938 M*/
4939 
4940 EXTERN_C_BEGIN
4941 #undef __FUNCT__
4942 #define __FUNCT__ "MatCreate_MPIAIJ"
4943 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
4944 {
4945   Mat_MPIAIJ     *b;
4946   PetscErrorCode ierr;
4947   PetscMPIInt    size;
4948 
4949   PetscFunctionBegin;
4950   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
4951 
4952   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
4953   B->data         = (void*)b;
4954   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4955   B->rmap->bs      = 1;
4956   B->assembled    = PETSC_FALSE;
4957   B->mapping      = 0;
4958 
4959   B->insertmode      = NOT_SET_VALUES;
4960   b->size            = size;
4961   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
4962 
4963   /* build cache for off array entries formed */
4964   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
4965   b->donotstash  = PETSC_FALSE;
4966   b->colmap      = 0;
4967   b->garray      = 0;
4968   b->roworiented = PETSC_TRUE;
4969 
4970   /* stuff used for matrix vector multiply */
4971   b->lvec      = PETSC_NULL;
4972   b->Mvctx     = PETSC_NULL;
4973 
4974   /* stuff for MatGetRow() */
4975   b->rowindices   = 0;
4976   b->rowvalues    = 0;
4977   b->getrowactive = PETSC_FALSE;
4978 
4979 #if defined(PETSC_HAVE_SPOOLES)
4980   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_spooles_C",
4981                                      "MatGetFactor_mpiaij_spooles",
4982                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
4983 #endif
4984 #if defined(PETSC_HAVE_MUMPS)
4985   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_mumps_C",
4986                                      "MatGetFactor_mpiaij_mumps",
4987                                      MatGetFactor_mpiaij_mumps);CHKERRQ(ierr);
4988 #endif
4989 #if defined(PETSC_HAVE_PASTIX)
4990   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_pastix_C",
4991 					   "MatGetFactor_mpiaij_pastix",
4992 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
4993 #endif
4994 #if defined(PETSC_HAVE_SUPERLU_DIST)
4995   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_superlu_dist_C",
4996                                      "MatGetFactor_mpiaij_superlu_dist",
4997                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
4998 #endif
4999   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5000                                      "MatStoreValues_MPIAIJ",
5001                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5002   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5003                                      "MatRetrieveValues_MPIAIJ",
5004                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5005   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5006 				     "MatGetDiagonalBlock_MPIAIJ",
5007                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5008   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5009 				     "MatIsTranspose_MPIAIJ",
5010 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5011   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5012 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5013 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5014   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5015 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5016 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5017   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5018 				     "MatDiagonalScaleLocal_MPIAIJ",
5019 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5020   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
5021                                      "MatConvert_MPIAIJ_MPICSRPERM",
5022                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
5023   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
5024                                      "MatConvert_MPIAIJ_MPICRL",
5025                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
5026   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5027                                      "MatMatMult_MPIDense_MPIAIJ",
5028                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5029   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5030                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5031                                       MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5032   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5033                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5034                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5035   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5036   PetscFunctionReturn(0);
5037 }
5038 EXTERN_C_END
5039 
5040 #undef __FUNCT__
5041 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5042 /*@
5043      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5044          and "off-diagonal" part of the matrix in CSR format.
5045 
5046    Collective on MPI_Comm
5047 
5048    Input Parameters:
5049 +  comm - MPI communicator
5050 .  m - number of local rows (Cannot be PETSC_DECIDE)
5051 .  n - This value should be the same as the local size used in creating the
5052        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5053        calculated if N is given) For square matrices n is almost always m.
5054 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5055 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5056 .   i - row indices for "diagonal" portion of matrix
5057 .   j - column indices
5058 .   a - matrix values
5059 .   oi - row indices for "off-diagonal" portion of matrix
5060 .   oj - column indices
5061 -   oa - matrix values
5062 
5063    Output Parameter:
5064 .   mat - the matrix
5065 
5066    Level: advanced
5067 
5068    Notes:
5069        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5070 
5071        The i and j indices are 0 based
5072 
5073        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5074 
5075 
5076 .keywords: matrix, aij, compressed row, sparse, parallel
5077 
5078 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5079           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5080 @*/
5081 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5082 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5083 {
5084   PetscErrorCode ierr;
5085   Mat_MPIAIJ     *maij;
5086 
5087  PetscFunctionBegin;
5088   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5089   if (i[0]) {
5090     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5091   }
5092   if (oi[0]) {
5093     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5094   }
5095   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5096   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5097   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5098   maij = (Mat_MPIAIJ*) (*mat)->data;
5099   maij->donotstash     = PETSC_TRUE;
5100   (*mat)->preallocated = PETSC_TRUE;
5101 
5102   ierr = PetscMapSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5103   ierr = PetscMapSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5104   ierr = PetscMapSetUp((*mat)->rmap);CHKERRQ(ierr);
5105   ierr = PetscMapSetUp((*mat)->cmap);CHKERRQ(ierr);
5106 
5107   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5108   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5109 
5110   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5111   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5112   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5113   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5114 
5115   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5116   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5117   PetscFunctionReturn(0);
5118 }
5119 
5120 /*
5121     Special version for direct calls from Fortran
5122 */
5123 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5124 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5125 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5126 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5127 #endif
5128 
5129 /* Change these macros so can be used in void function */
5130 #undef CHKERRQ
5131 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5132 #undef SETERRQ2
5133 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5134 #undef SETERRQ
5135 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5136 
5137 EXTERN_C_BEGIN
5138 #undef __FUNCT__
5139 #define __FUNCT__ "matsetvaluesmpiaij_"
5140 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5141 {
5142   Mat             mat = *mmat;
5143   PetscInt        m = *mm, n = *mn;
5144   InsertMode      addv = *maddv;
5145   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5146   PetscScalar     value;
5147   PetscErrorCode  ierr;
5148 
5149   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5150   if (mat->insertmode == NOT_SET_VALUES) {
5151     mat->insertmode = addv;
5152   }
5153 #if defined(PETSC_USE_DEBUG)
5154   else if (mat->insertmode != addv) {
5155     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5156   }
5157 #endif
5158   {
5159   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5160   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5161   PetscTruth      roworiented = aij->roworiented;
5162 
5163   /* Some Variables required in the macro */
5164   Mat             A = aij->A;
5165   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5166   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5167   MatScalar       *aa = a->a;
5168   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5169   Mat             B = aij->B;
5170   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5171   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5172   MatScalar       *ba = b->a;
5173 
5174   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5175   PetscInt        nonew = a->nonew;
5176   MatScalar       *ap1,*ap2;
5177 
5178   PetscFunctionBegin;
5179   for (i=0; i<m; i++) {
5180     if (im[i] < 0) continue;
5181 #if defined(PETSC_USE_DEBUG)
5182     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5183 #endif
5184     if (im[i] >= rstart && im[i] < rend) {
5185       row      = im[i] - rstart;
5186       lastcol1 = -1;
5187       rp1      = aj + ai[row];
5188       ap1      = aa + ai[row];
5189       rmax1    = aimax[row];
5190       nrow1    = ailen[row];
5191       low1     = 0;
5192       high1    = nrow1;
5193       lastcol2 = -1;
5194       rp2      = bj + bi[row];
5195       ap2      = ba + bi[row];
5196       rmax2    = bimax[row];
5197       nrow2    = bilen[row];
5198       low2     = 0;
5199       high2    = nrow2;
5200 
5201       for (j=0; j<n; j++) {
5202         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5203         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5204         if (in[j] >= cstart && in[j] < cend){
5205           col = in[j] - cstart;
5206           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5207         } else if (in[j] < 0) continue;
5208 #if defined(PETSC_USE_DEBUG)
5209         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5210 #endif
5211         else {
5212           if (mat->was_assembled) {
5213             if (!aij->colmap) {
5214               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5215             }
5216 #if defined (PETSC_USE_CTABLE)
5217             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5218 	    col--;
5219 #else
5220             col = aij->colmap[in[j]] - 1;
5221 #endif
5222             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5223               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5224               col =  in[j];
5225               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5226               B = aij->B;
5227               b = (Mat_SeqAIJ*)B->data;
5228               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5229               rp2      = bj + bi[row];
5230               ap2      = ba + bi[row];
5231               rmax2    = bimax[row];
5232               nrow2    = bilen[row];
5233               low2     = 0;
5234               high2    = nrow2;
5235               bm       = aij->B->rmap->n;
5236               ba = b->a;
5237             }
5238           } else col = in[j];
5239           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5240         }
5241       }
5242     } else {
5243       if (!aij->donotstash) {
5244         if (roworiented) {
5245           if (ignorezeroentries && v[i*n] == 0.0) continue;
5246           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
5247         } else {
5248           if (ignorezeroentries && v[i] == 0.0) continue;
5249           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
5250         }
5251       }
5252     }
5253   }}
5254   PetscFunctionReturnVoid();
5255 }
5256 EXTERN_C_END
5257 
5258