xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision a7420bb78906e62bcbccf1ab53b91487fb32e30b)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "../src/inline/spops.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscTruth     aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap->n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap->n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap->n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap->n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 
207 #define CHUNKSIZE   15
208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
209 { \
210     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
211     lastcol1 = col;\
212     while (high1-low1 > 5) { \
213       t = (low1+high1)/2; \
214       if (rp1[t] > col) high1 = t; \
215       else             low1  = t; \
216     } \
217       for (_i=low1; _i<high1; _i++) { \
218         if (rp1[_i] > col) break; \
219         if (rp1[_i] == col) { \
220           if (addv == ADD_VALUES) ap1[_i] += value;   \
221           else                    ap1[_i] = value; \
222           goto a_noinsert; \
223         } \
224       }  \
225       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
226       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
227       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
228       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
229       N = nrow1++ - 1; a->nz++; high1++; \
230       /* shift up all the later entries in this row */ \
231       for (ii=N; ii>=_i; ii--) { \
232         rp1[ii+1] = rp1[ii]; \
233         ap1[ii+1] = ap1[ii]; \
234       } \
235       rp1[_i] = col;  \
236       ap1[_i] = value;  \
237       a_noinsert: ; \
238       ailen[row] = nrow1; \
239 }
240 
241 
242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
243 { \
244     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
245     lastcol2 = col;\
246     while (high2-low2 > 5) { \
247       t = (low2+high2)/2; \
248       if (rp2[t] > col) high2 = t; \
249       else             low2  = t; \
250     } \
251     for (_i=low2; _i<high2; _i++) {		\
252       if (rp2[_i] > col) break;			\
253       if (rp2[_i] == col) {			      \
254 	if (addv == ADD_VALUES) ap2[_i] += value;     \
255 	else                    ap2[_i] = value;      \
256 	goto b_noinsert;			      \
257       }						      \
258     }							      \
259     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
260     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
261     if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
262     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
263     N = nrow2++ - 1; b->nz++; high2++;					\
264     /* shift up all the later entries in this row */			\
265     for (ii=N; ii>=_i; ii--) {						\
266       rp2[ii+1] = rp2[ii];						\
267       ap2[ii+1] = ap2[ii];						\
268     }									\
269     rp2[_i] = col;							\
270     ap2[_i] = value;							\
271     b_noinsert: ;								\
272     bilen[row] = nrow2;							\
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
278 {
279   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
280   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
281   PetscErrorCode ierr;
282   PetscInt       l,*garray = mat->garray,diag;
283 
284   PetscFunctionBegin;
285   /* code only works for square matrices A */
286 
287   /* find size of row to the left of the diagonal part */
288   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
289   row  = row - diag;
290   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
291     if (garray[b->j[b->i[row]+l]] > diag) break;
292   }
293   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
294 
295   /* diagonal part */
296   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
297 
298   /* right of diagonal part */
299   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
300   PetscFunctionReturn(0);
301 }
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "MatSetValues_MPIAIJ"
305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
306 {
307   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
308   PetscScalar    value;
309   PetscErrorCode ierr;
310   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
311   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
312   PetscTruth     roworiented = aij->roworiented;
313 
314   /* Some Variables required in the macro */
315   Mat            A = aij->A;
316   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
317   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
318   MatScalar      *aa = a->a;
319   PetscTruth     ignorezeroentries = a->ignorezeroentries;
320   Mat            B = aij->B;
321   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
322   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
323   MatScalar      *ba = b->a;
324 
325   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
326   PetscInt       nonew = a->nonew;
327   MatScalar      *ap1,*ap2;
328 
329   PetscFunctionBegin;
330   for (i=0; i<m; i++) {
331     if (im[i] < 0) continue;
332 #if defined(PETSC_USE_DEBUG)
333     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
334 #endif
335     if (im[i] >= rstart && im[i] < rend) {
336       row      = im[i] - rstart;
337       lastcol1 = -1;
338       rp1      = aj + ai[row];
339       ap1      = aa + ai[row];
340       rmax1    = aimax[row];
341       nrow1    = ailen[row];
342       low1     = 0;
343       high1    = nrow1;
344       lastcol2 = -1;
345       rp2      = bj + bi[row];
346       ap2      = ba + bi[row];
347       rmax2    = bimax[row];
348       nrow2    = bilen[row];
349       low2     = 0;
350       high2    = nrow2;
351 
352       for (j=0; j<n; j++) {
353         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
354         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
355         if (in[j] >= cstart && in[j] < cend){
356           col = in[j] - cstart;
357           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
358         } else if (in[j] < 0) continue;
359 #if defined(PETSC_USE_DEBUG)
360         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
361 #endif
362         else {
363           if (mat->was_assembled) {
364             if (!aij->colmap) {
365               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
366             }
367 #if defined (PETSC_USE_CTABLE)
368             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
369 	    col--;
370 #else
371             col = aij->colmap[in[j]] - 1;
372 #endif
373             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
374               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
375               col =  in[j];
376               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
377               B = aij->B;
378               b = (Mat_SeqAIJ*)B->data;
379               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
380               rp2      = bj + bi[row];
381               ap2      = ba + bi[row];
382               rmax2    = bimax[row];
383               nrow2    = bilen[row];
384               low2     = 0;
385               high2    = nrow2;
386               bm       = aij->B->rmap->n;
387               ba = b->a;
388             }
389           } else col = in[j];
390           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
391         }
392       }
393     } else {
394       if (!aij->donotstash) {
395         if (roworiented) {
396           if (ignorezeroentries && v[i*n] == 0.0) continue;
397           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
398         } else {
399           if (ignorezeroentries && v[i] == 0.0) continue;
400           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
401         }
402       }
403     }
404   }
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNCT__
409 #define __FUNCT__ "MatGetValues_MPIAIJ"
410 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
411 {
412   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
413   PetscErrorCode ierr;
414   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
415   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
416 
417   PetscFunctionBegin;
418   for (i=0; i<m; i++) {
419     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
420     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
421     if (idxm[i] >= rstart && idxm[i] < rend) {
422       row = idxm[i] - rstart;
423       for (j=0; j<n; j++) {
424         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
425         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
426         if (idxn[j] >= cstart && idxn[j] < cend){
427           col = idxn[j] - cstart;
428           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
429         } else {
430           if (!aij->colmap) {
431             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
432           }
433 #if defined (PETSC_USE_CTABLE)
434           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
435           col --;
436 #else
437           col = aij->colmap[idxn[j]] - 1;
438 #endif
439           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
440           else {
441             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
442           }
443         }
444       }
445     } else {
446       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
447     }
448   }
449   PetscFunctionReturn(0);
450 }
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscErrorCode ierr;
458   PetscInt       nstash,reallocs;
459   InsertMode     addv;
460 
461   PetscFunctionBegin;
462   if (aij->donotstash) {
463     PetscFunctionReturn(0);
464   }
465 
466   /* make sure all processors are either in INSERTMODE or ADDMODE */
467   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
468   if (addv == (ADD_VALUES|INSERT_VALUES)) {
469     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
470   }
471   mat->insertmode = addv; /* in case this processor had no cache */
472 
473   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
474   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
475   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
482 {
483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
484   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
485   PetscErrorCode ierr;
486   PetscMPIInt    n;
487   PetscInt       i,j,rstart,ncols,flg;
488   PetscInt       *row,*col;
489   PetscTruth     other_disassembled;
490   PetscScalar    *val;
491   InsertMode     addv = mat->insertmode;
492 
493   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
494   PetscFunctionBegin;
495   if (!aij->donotstash) {
496     while (1) {
497       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
498       if (!flg) break;
499 
500       for (i=0; i<n;) {
501         /* Now identify the consecutive vals belonging to the same row */
502         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
503         if (j < n) ncols = j-i;
504         else       ncols = n-i;
505         /* Now assemble all these values with a single function call */
506         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
507         i = j;
508       }
509     }
510     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
511   }
512   a->compressedrow.use     = PETSC_FALSE;
513   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
514   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
515 
516   /* determine if any processor has disassembled, if so we must
517      also disassemble ourselfs, in order that we may reassemble. */
518   /*
519      if nonzero structure of submatrix B cannot change then we know that
520      no processor disassembled thus we can skip this stuff
521   */
522   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
523     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
524     if (mat->was_assembled && !other_disassembled) {
525       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
526     }
527   }
528   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
529     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
530   }
531   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
532   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
533   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
534   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
535 
536   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
537   aij->rowvalues = 0;
538 
539   /* used by MatAXPY() */
540   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
541   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
542 
543   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
544   PetscFunctionReturn(0);
545 }
546 
547 #undef __FUNCT__
548 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
549 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
550 {
551   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
552   PetscErrorCode ierr;
553 
554   PetscFunctionBegin;
555   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
556   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatZeroRows_MPIAIJ"
562 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
563 {
564   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
565   PetscErrorCode ierr;
566   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
567   PetscInt       i,*owners = A->rmap->range;
568   PetscInt       *nprocs,j,idx,nsends,row;
569   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
570   PetscInt       *rvalues,count,base,slen,*source;
571   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
572   MPI_Comm       comm = ((PetscObject)A)->comm;
573   MPI_Request    *send_waits,*recv_waits;
574   MPI_Status     recv_status,*send_status;
575 #if defined(PETSC_DEBUG)
576   PetscTruth     found = PETSC_FALSE;
577 #endif
578 
579   PetscFunctionBegin;
580   /*  first count number of contributors to each processor */
581   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
582   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
583   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
584   j = 0;
585   for (i=0; i<N; i++) {
586     if (lastidx > (idx = rows[i])) j = 0;
587     lastidx = idx;
588     for (; j<size; j++) {
589       if (idx >= owners[j] && idx < owners[j+1]) {
590         nprocs[2*j]++;
591         nprocs[2*j+1] = 1;
592         owner[i] = j;
593 #if defined(PETSC_DEBUG)
594         found = PETSC_TRUE;
595 #endif
596         break;
597       }
598     }
599 #if defined(PETSC_DEBUG)
600     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
601     found = PETSC_FALSE;
602 #endif
603   }
604   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
605 
606   /* inform other processors of number of messages and max length*/
607   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
608 
609   /* post receives:   */
610   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
611   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
612   for (i=0; i<nrecvs; i++) {
613     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
614   }
615 
616   /* do sends:
617       1) starts[i] gives the starting index in svalues for stuff going to
618          the ith processor
619   */
620   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
621   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
622   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
623   starts[0] = 0;
624   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
625   for (i=0; i<N; i++) {
626     svalues[starts[owner[i]]++] = rows[i];
627   }
628 
629   starts[0] = 0;
630   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
631   count = 0;
632   for (i=0; i<size; i++) {
633     if (nprocs[2*i+1]) {
634       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
635     }
636   }
637   ierr = PetscFree(starts);CHKERRQ(ierr);
638 
639   base = owners[rank];
640 
641   /*  wait on receives */
642   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
643   source = lens + nrecvs;
644   count  = nrecvs; slen = 0;
645   while (count) {
646     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
647     /* unpack receives into our local space */
648     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
649     source[imdex]  = recv_status.MPI_SOURCE;
650     lens[imdex]    = n;
651     slen          += n;
652     count--;
653   }
654   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
655 
656   /* move the data into the send scatter */
657   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
658   count = 0;
659   for (i=0; i<nrecvs; i++) {
660     values = rvalues + i*nmax;
661     for (j=0; j<lens[i]; j++) {
662       lrows[count++] = values[j] - base;
663     }
664   }
665   ierr = PetscFree(rvalues);CHKERRQ(ierr);
666   ierr = PetscFree(lens);CHKERRQ(ierr);
667   ierr = PetscFree(owner);CHKERRQ(ierr);
668   ierr = PetscFree(nprocs);CHKERRQ(ierr);
669 
670   /* actually zap the local rows */
671   /*
672         Zero the required rows. If the "diagonal block" of the matrix
673      is square and the user wishes to set the diagonal we use separate
674      code so that MatSetValues() is not called for each diagonal allocating
675      new memory, thus calling lots of mallocs and slowing things down.
676 
677        Contributed by: Matthew Knepley
678   */
679   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
680   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
681   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
682     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
683   } else if (diag != 0.0) {
684     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
685     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
686       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
687 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
688     }
689     for (i = 0; i < slen; i++) {
690       row  = lrows[i] + rstart;
691       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
692     }
693     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
694     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
695   } else {
696     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
697   }
698   ierr = PetscFree(lrows);CHKERRQ(ierr);
699 
700   /* wait on sends */
701   if (nsends) {
702     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
703     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
704     ierr = PetscFree(send_status);CHKERRQ(ierr);
705   }
706   ierr = PetscFree(send_waits);CHKERRQ(ierr);
707   ierr = PetscFree(svalues);CHKERRQ(ierr);
708 
709   PetscFunctionReturn(0);
710 }
711 
712 #undef __FUNCT__
713 #define __FUNCT__ "MatMult_MPIAIJ"
714 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
715 {
716   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
717   PetscErrorCode ierr;
718   PetscInt       nt;
719 
720   PetscFunctionBegin;
721   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
722   if (nt != A->cmap->n) {
723     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
724   }
725   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
726   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
727   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
728   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
729   PetscFunctionReturn(0);
730 }
731 
732 #undef __FUNCT__
733 #define __FUNCT__ "MatMultAdd_MPIAIJ"
734 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
735 {
736   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
737   PetscErrorCode ierr;
738 
739   PetscFunctionBegin;
740   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
741   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
742   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
743   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
744   PetscFunctionReturn(0);
745 }
746 
747 #undef __FUNCT__
748 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
749 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
750 {
751   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
752   PetscErrorCode ierr;
753   PetscTruth     merged;
754 
755   PetscFunctionBegin;
756   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
757   /* do nondiagonal part */
758   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
759   if (!merged) {
760     /* send it on its way */
761     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
762     /* do local part */
763     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
764     /* receive remote parts: note this assumes the values are not actually */
765     /* added in yy until the next line, */
766     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
767   } else {
768     /* do local part */
769     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
770     /* send it on its way */
771     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
772     /* values actually were received in the Begin() but we need to call this nop */
773     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
774   }
775   PetscFunctionReturn(0);
776 }
777 
778 EXTERN_C_BEGIN
779 #undef __FUNCT__
780 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
781 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
782 {
783   MPI_Comm       comm;
784   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
785   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
786   IS             Me,Notme;
787   PetscErrorCode ierr;
788   PetscInt       M,N,first,last,*notme,i;
789   PetscMPIInt    size;
790 
791   PetscFunctionBegin;
792 
793   /* Easy test: symmetric diagonal block */
794   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
795   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
796   if (!*f) PetscFunctionReturn(0);
797   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
798   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
799   if (size == 1) PetscFunctionReturn(0);
800 
801   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
802   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
803   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
804   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
805   for (i=0; i<first; i++) notme[i] = i;
806   for (i=last; i<M; i++) notme[i-last+first] = i;
807   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
808   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
809   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
810   Aoff = Aoffs[0];
811   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
812   Boff = Boffs[0];
813   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
814   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
815   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
816   ierr = ISDestroy(Me);CHKERRQ(ierr);
817   ierr = ISDestroy(Notme);CHKERRQ(ierr);
818 
819   PetscFunctionReturn(0);
820 }
821 EXTERN_C_END
822 
823 #undef __FUNCT__
824 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
825 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
826 {
827   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
828   PetscErrorCode ierr;
829 
830   PetscFunctionBegin;
831   /* do nondiagonal part */
832   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
833   /* send it on its way */
834   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
835   /* do local part */
836   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
837   /* receive remote parts */
838   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
839   PetscFunctionReturn(0);
840 }
841 
842 /*
843   This only works correctly for square matrices where the subblock A->A is the
844    diagonal block
845 */
846 #undef __FUNCT__
847 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
848 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
849 {
850   PetscErrorCode ierr;
851   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
852 
853   PetscFunctionBegin;
854   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
855   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) {
856     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
857   }
858   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
859   PetscFunctionReturn(0);
860 }
861 
862 #undef __FUNCT__
863 #define __FUNCT__ "MatScale_MPIAIJ"
864 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
865 {
866   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
867   PetscErrorCode ierr;
868 
869   PetscFunctionBegin;
870   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
871   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
872   PetscFunctionReturn(0);
873 }
874 
875 #undef __FUNCT__
876 #define __FUNCT__ "MatDestroy_MPIAIJ"
877 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
878 {
879   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
880   PetscErrorCode ierr;
881 
882   PetscFunctionBegin;
883 #if defined(PETSC_USE_LOG)
884   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
885 #endif
886   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
887   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
888   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
889   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
890 #if defined (PETSC_USE_CTABLE)
891   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
892 #else
893   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
894 #endif
895   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
896   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
897   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
898   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
899   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
900   ierr = PetscFree(aij);CHKERRQ(ierr);
901 
902   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
903   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
904   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
905   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
906   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
907   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
908   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
909   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
910   PetscFunctionReturn(0);
911 }
912 
913 #undef __FUNCT__
914 #define __FUNCT__ "MatView_MPIAIJ_Binary"
915 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
916 {
917   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
918   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
919   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
920   PetscErrorCode    ierr;
921   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
922   int               fd;
923   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
924   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
925   PetscScalar       *column_values;
926 
927   PetscFunctionBegin;
928   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
929   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
930   nz   = A->nz + B->nz;
931   if (!rank) {
932     header[0] = MAT_FILE_COOKIE;
933     header[1] = mat->rmap->N;
934     header[2] = mat->cmap->N;
935     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
936     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
937     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
938     /* get largest number of rows any processor has */
939     rlen = mat->rmap->n;
940     range = mat->rmap->range;
941     for (i=1; i<size; i++) {
942       rlen = PetscMax(rlen,range[i+1] - range[i]);
943     }
944   } else {
945     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
946     rlen = mat->rmap->n;
947   }
948 
949   /* load up the local row counts */
950   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
951   for (i=0; i<mat->rmap->n; i++) {
952     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
953   }
954 
955   /* store the row lengths to the file */
956   if (!rank) {
957     MPI_Status status;
958     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
959     for (i=1; i<size; i++) {
960       rlen = range[i+1] - range[i];
961       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
962       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
963     }
964   } else {
965     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
966   }
967   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
968 
969   /* load up the local column indices */
970   nzmax = nz; /* )th processor needs space a largest processor needs */
971   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
972   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
973   cnt  = 0;
974   for (i=0; i<mat->rmap->n; i++) {
975     for (j=B->i[i]; j<B->i[i+1]; j++) {
976       if ( (col = garray[B->j[j]]) > cstart) break;
977       column_indices[cnt++] = col;
978     }
979     for (k=A->i[i]; k<A->i[i+1]; k++) {
980       column_indices[cnt++] = A->j[k] + cstart;
981     }
982     for (; j<B->i[i+1]; j++) {
983       column_indices[cnt++] = garray[B->j[j]];
984     }
985   }
986   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
987 
988   /* store the column indices to the file */
989   if (!rank) {
990     MPI_Status status;
991     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
992     for (i=1; i<size; i++) {
993       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
994       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
995       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
996       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
997     }
998   } else {
999     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1000     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1001   }
1002   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1003 
1004   /* load up the local column values */
1005   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1006   cnt  = 0;
1007   for (i=0; i<mat->rmap->n; i++) {
1008     for (j=B->i[i]; j<B->i[i+1]; j++) {
1009       if ( garray[B->j[j]] > cstart) break;
1010       column_values[cnt++] = B->a[j];
1011     }
1012     for (k=A->i[i]; k<A->i[i+1]; k++) {
1013       column_values[cnt++] = A->a[k];
1014     }
1015     for (; j<B->i[i+1]; j++) {
1016       column_values[cnt++] = B->a[j];
1017     }
1018   }
1019   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1020 
1021   /* store the column values to the file */
1022   if (!rank) {
1023     MPI_Status status;
1024     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1025     for (i=1; i<size; i++) {
1026       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1027       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1028       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1029       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1030     }
1031   } else {
1032     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1033     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1034   }
1035   ierr = PetscFree(column_values);CHKERRQ(ierr);
1036   PetscFunctionReturn(0);
1037 }
1038 
1039 #undef __FUNCT__
1040 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1041 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1042 {
1043   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1044   PetscErrorCode    ierr;
1045   PetscMPIInt       rank = aij->rank,size = aij->size;
1046   PetscTruth        isdraw,iascii,isbinary;
1047   PetscViewer       sviewer;
1048   PetscViewerFormat format;
1049 
1050   PetscFunctionBegin;
1051   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1052   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1053   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1054   if (iascii) {
1055     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1056     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1057       MatInfo    info;
1058       PetscTruth inodes;
1059 
1060       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1061       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1062       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1063       if (!inodes) {
1064         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1065 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1066       } else {
1067         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1068 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1069       }
1070       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1071       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1072       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1073       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1074       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1075       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1076       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1077       PetscFunctionReturn(0);
1078     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1079       PetscInt   inodecount,inodelimit,*inodes;
1080       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1081       if (inodes) {
1082         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1083       } else {
1084         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1085       }
1086       PetscFunctionReturn(0);
1087     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1088       PetscFunctionReturn(0);
1089     }
1090   } else if (isbinary) {
1091     if (size == 1) {
1092       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1093       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1094     } else {
1095       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1096     }
1097     PetscFunctionReturn(0);
1098   } else if (isdraw) {
1099     PetscDraw  draw;
1100     PetscTruth isnull;
1101     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1102     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1103   }
1104 
1105   if (size == 1) {
1106     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1107     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1108   } else {
1109     /* assemble the entire matrix onto first processor. */
1110     Mat         A;
1111     Mat_SeqAIJ  *Aloc;
1112     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1113     MatScalar   *a;
1114 
1115     if (mat->rmap->N > 1024) {
1116       PetscTruth flg = PETSC_FALSE;
1117 
1118       ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1119       if (!flg) {
1120         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1121       }
1122     }
1123 
1124     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1125     if (!rank) {
1126       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1127     } else {
1128       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1129     }
1130     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1131     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1132     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1133     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1134 
1135     /* copy over the A part */
1136     Aloc = (Mat_SeqAIJ*)aij->A->data;
1137     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1138     row = mat->rmap->rstart;
1139     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1140     for (i=0; i<m; i++) {
1141       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1142       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1143     }
1144     aj = Aloc->j;
1145     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1146 
1147     /* copy over the B part */
1148     Aloc = (Mat_SeqAIJ*)aij->B->data;
1149     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1150     row  = mat->rmap->rstart;
1151     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1152     ct   = cols;
1153     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1154     for (i=0; i<m; i++) {
1155       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1156       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1157     }
1158     ierr = PetscFree(ct);CHKERRQ(ierr);
1159     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1160     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1161     /*
1162        Everyone has to call to draw the matrix since the graphics waits are
1163        synchronized across all processors that share the PetscDraw object
1164     */
1165     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1166     if (!rank) {
1167       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1168       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1169     }
1170     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1171     ierr = MatDestroy(A);CHKERRQ(ierr);
1172   }
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 #undef __FUNCT__
1177 #define __FUNCT__ "MatView_MPIAIJ"
1178 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1179 {
1180   PetscErrorCode ierr;
1181   PetscTruth     iascii,isdraw,issocket,isbinary;
1182 
1183   PetscFunctionBegin;
1184   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1185   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1186   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1187   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1188   if (iascii || isdraw || isbinary || issocket) {
1189     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1190   } else {
1191     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1192   }
1193   PetscFunctionReturn(0);
1194 }
1195 
1196 #undef __FUNCT__
1197 #define __FUNCT__ "MatRelax_MPIAIJ"
1198 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1199 {
1200   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1201   PetscErrorCode ierr;
1202   Vec            bb1 = 0;
1203 
1204   PetscFunctionBegin;
1205   if (its > 1) {ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);}
1206 
1207   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1208     if (flag & SOR_ZERO_INITIAL_GUESS) {
1209       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1210       its--;
1211     }
1212 
1213     while (its--) {
1214       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1215       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1216 
1217       /* update rhs: bb1 = bb - B*x */
1218       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1219       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1220 
1221       /* local sweep */
1222       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1223     }
1224   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1225     if (flag & SOR_ZERO_INITIAL_GUESS) {
1226       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1227       its--;
1228     }
1229     while (its--) {
1230       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1231       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1232 
1233       /* update rhs: bb1 = bb - B*x */
1234       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1235       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1236 
1237       /* local sweep */
1238       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1239     }
1240   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1241     if (flag & SOR_ZERO_INITIAL_GUESS) {
1242       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1243       its--;
1244     }
1245     while (its--) {
1246       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1247       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1248 
1249       /* update rhs: bb1 = bb - B*x */
1250       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1251       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1252 
1253       /* local sweep */
1254       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1255     }
1256   }  else if (flag & SOR_EISENSTAT) {
1257     Vec         xx1;
1258 
1259     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1260     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1261     ierr = (*mat->A->ops->relax)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1262 
1263     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1264     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1265     if (!mat->diag) {
1266       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1267       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1268     }
1269     ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1270     ierr = VecWAXPY(bb1,-1.0,bb1,bb);CHKERRQ(ierr);
1271     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1272 
1273     /* local sweep */
1274     ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1275     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1276     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1277   } else {
1278     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1279   }
1280 
1281   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1282   PetscFunctionReturn(0);
1283 }
1284 
1285 #undef __FUNCT__
1286 #define __FUNCT__ "MatPermute_MPIAIJ"
1287 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1288 {
1289   MPI_Comm       comm,pcomm;
1290   PetscInt       first,local_size,nrows;
1291   const PetscInt *rows;
1292   PetscMPIInt    size;
1293   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1294   PetscErrorCode ierr;
1295 
1296   PetscFunctionBegin;
1297   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1298   /* make a collective version of 'rowp' */
1299   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1300   if (pcomm==comm) {
1301     crowp = rowp;
1302   } else {
1303     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1304     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1305     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1306     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1307   }
1308   /* collect the global row permutation and invert it */
1309   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1310   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1311   if (pcomm!=comm) {
1312     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1313   }
1314   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1315   /* get the local target indices */
1316   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1317   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1318   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1319   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
1320   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1321   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1322   /* the column permutation is so much easier;
1323      make a local version of 'colp' and invert it */
1324   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1325   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1326   if (size==1) {
1327     lcolp = colp;
1328   } else {
1329     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1330     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1331     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
1332   }
1333   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1334   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1335   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1336   if (size>1) {
1337     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1338     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1339   }
1340   /* now we just get the submatrix */
1341   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1342   /* clean up */
1343   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1344   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1345   PetscFunctionReturn(0);
1346 }
1347 
1348 #undef __FUNCT__
1349 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1350 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1351 {
1352   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1353   Mat            A = mat->A,B = mat->B;
1354   PetscErrorCode ierr;
1355   PetscReal      isend[5],irecv[5];
1356 
1357   PetscFunctionBegin;
1358   info->block_size     = 1.0;
1359   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1360   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1361   isend[3] = info->memory;  isend[4] = info->mallocs;
1362   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1363   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1364   isend[3] += info->memory;  isend[4] += info->mallocs;
1365   if (flag == MAT_LOCAL) {
1366     info->nz_used      = isend[0];
1367     info->nz_allocated = isend[1];
1368     info->nz_unneeded  = isend[2];
1369     info->memory       = isend[3];
1370     info->mallocs      = isend[4];
1371   } else if (flag == MAT_GLOBAL_MAX) {
1372     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1373     info->nz_used      = irecv[0];
1374     info->nz_allocated = irecv[1];
1375     info->nz_unneeded  = irecv[2];
1376     info->memory       = irecv[3];
1377     info->mallocs      = irecv[4];
1378   } else if (flag == MAT_GLOBAL_SUM) {
1379     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1380     info->nz_used      = irecv[0];
1381     info->nz_allocated = irecv[1];
1382     info->nz_unneeded  = irecv[2];
1383     info->memory       = irecv[3];
1384     info->mallocs      = irecv[4];
1385   }
1386   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1387   info->fill_ratio_needed = 0;
1388   info->factor_mallocs    = 0;
1389 
1390   PetscFunctionReturn(0);
1391 }
1392 
1393 #undef __FUNCT__
1394 #define __FUNCT__ "MatSetOption_MPIAIJ"
1395 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1396 {
1397   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1398   PetscErrorCode ierr;
1399 
1400   PetscFunctionBegin;
1401   switch (op) {
1402   case MAT_NEW_NONZERO_LOCATIONS:
1403   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1404   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1405   case MAT_KEEP_NONZERO_PATTERN:
1406   case MAT_NEW_NONZERO_LOCATION_ERR:
1407   case MAT_USE_INODES:
1408   case MAT_IGNORE_ZERO_ENTRIES:
1409     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1410     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1411     break;
1412   case MAT_ROW_ORIENTED:
1413     a->roworiented = flg;
1414     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1415     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1416     break;
1417   case MAT_NEW_DIAGONALS:
1418     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1419     break;
1420   case MAT_IGNORE_OFF_PROC_ENTRIES:
1421     a->donotstash = PETSC_TRUE;
1422     break;
1423   case MAT_SYMMETRIC:
1424     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1425     break;
1426   case MAT_STRUCTURALLY_SYMMETRIC:
1427   case MAT_HERMITIAN:
1428   case MAT_SYMMETRY_ETERNAL:
1429     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1430     break;
1431   default:
1432     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1433   }
1434   PetscFunctionReturn(0);
1435 }
1436 
1437 #undef __FUNCT__
1438 #define __FUNCT__ "MatGetRow_MPIAIJ"
1439 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1440 {
1441   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1442   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1443   PetscErrorCode ierr;
1444   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1445   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1446   PetscInt       *cmap,*idx_p;
1447 
1448   PetscFunctionBegin;
1449   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1450   mat->getrowactive = PETSC_TRUE;
1451 
1452   if (!mat->rowvalues && (idx || v)) {
1453     /*
1454         allocate enough space to hold information from the longest row.
1455     */
1456     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1457     PetscInt     max = 1,tmp;
1458     for (i=0; i<matin->rmap->n; i++) {
1459       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1460       if (max < tmp) { max = tmp; }
1461     }
1462     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1463     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1464   }
1465 
1466   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1467   lrow = row - rstart;
1468 
1469   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1470   if (!v)   {pvA = 0; pvB = 0;}
1471   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1472   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1473   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1474   nztot = nzA + nzB;
1475 
1476   cmap  = mat->garray;
1477   if (v  || idx) {
1478     if (nztot) {
1479       /* Sort by increasing column numbers, assuming A and B already sorted */
1480       PetscInt imark = -1;
1481       if (v) {
1482         *v = v_p = mat->rowvalues;
1483         for (i=0; i<nzB; i++) {
1484           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1485           else break;
1486         }
1487         imark = i;
1488         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1489         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1490       }
1491       if (idx) {
1492         *idx = idx_p = mat->rowindices;
1493         if (imark > -1) {
1494           for (i=0; i<imark; i++) {
1495             idx_p[i] = cmap[cworkB[i]];
1496           }
1497         } else {
1498           for (i=0; i<nzB; i++) {
1499             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1500             else break;
1501           }
1502           imark = i;
1503         }
1504         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1505         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1506       }
1507     } else {
1508       if (idx) *idx = 0;
1509       if (v)   *v   = 0;
1510     }
1511   }
1512   *nz = nztot;
1513   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1514   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1515   PetscFunctionReturn(0);
1516 }
1517 
1518 #undef __FUNCT__
1519 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1520 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1521 {
1522   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1523 
1524   PetscFunctionBegin;
1525   if (!aij->getrowactive) {
1526     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1527   }
1528   aij->getrowactive = PETSC_FALSE;
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 #undef __FUNCT__
1533 #define __FUNCT__ "MatNorm_MPIAIJ"
1534 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1535 {
1536   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1537   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1538   PetscErrorCode ierr;
1539   PetscInt       i,j,cstart = mat->cmap->rstart;
1540   PetscReal      sum = 0.0;
1541   MatScalar      *v;
1542 
1543   PetscFunctionBegin;
1544   if (aij->size == 1) {
1545     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1546   } else {
1547     if (type == NORM_FROBENIUS) {
1548       v = amat->a;
1549       for (i=0; i<amat->nz; i++) {
1550 #if defined(PETSC_USE_COMPLEX)
1551         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1552 #else
1553         sum += (*v)*(*v); v++;
1554 #endif
1555       }
1556       v = bmat->a;
1557       for (i=0; i<bmat->nz; i++) {
1558 #if defined(PETSC_USE_COMPLEX)
1559         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1560 #else
1561         sum += (*v)*(*v); v++;
1562 #endif
1563       }
1564       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1565       *norm = sqrt(*norm);
1566     } else if (type == NORM_1) { /* max column norm */
1567       PetscReal *tmp,*tmp2;
1568       PetscInt  *jj,*garray = aij->garray;
1569       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1570       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1571       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1572       *norm = 0.0;
1573       v = amat->a; jj = amat->j;
1574       for (j=0; j<amat->nz; j++) {
1575         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1576       }
1577       v = bmat->a; jj = bmat->j;
1578       for (j=0; j<bmat->nz; j++) {
1579         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1580       }
1581       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1582       for (j=0; j<mat->cmap->N; j++) {
1583         if (tmp2[j] > *norm) *norm = tmp2[j];
1584       }
1585       ierr = PetscFree(tmp);CHKERRQ(ierr);
1586       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1587     } else if (type == NORM_INFINITY) { /* max row norm */
1588       PetscReal ntemp = 0.0;
1589       for (j=0; j<aij->A->rmap->n; j++) {
1590         v = amat->a + amat->i[j];
1591         sum = 0.0;
1592         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1593           sum += PetscAbsScalar(*v); v++;
1594         }
1595         v = bmat->a + bmat->i[j];
1596         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1597           sum += PetscAbsScalar(*v); v++;
1598         }
1599         if (sum > ntemp) ntemp = sum;
1600       }
1601       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1602     } else {
1603       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1604     }
1605   }
1606   PetscFunctionReturn(0);
1607 }
1608 
1609 #undef __FUNCT__
1610 #define __FUNCT__ "MatTranspose_MPIAIJ"
1611 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1612 {
1613   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1614   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1615   PetscErrorCode ierr;
1616   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1617   PetscInt       cstart=A->cmap->rstart,ncol;
1618   Mat            B;
1619   MatScalar      *array;
1620 
1621   PetscFunctionBegin;
1622   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1623 
1624   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1625   ai = Aloc->i; aj = Aloc->j;
1626   bi = Bloc->i; bj = Bloc->j;
1627   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1628     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1629     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1630     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1631     for (i=0; i<ai[ma]; i++){
1632       d_nnz[aj[i]] ++;
1633       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1634     }
1635 
1636     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1637     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1638     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1639     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1640     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1641   } else {
1642     B = *matout;
1643   }
1644 
1645   /* copy over the A part */
1646   array = Aloc->a;
1647   row = A->rmap->rstart;
1648   for (i=0; i<ma; i++) {
1649     ncol = ai[i+1]-ai[i];
1650     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1651     row++; array += ncol; aj += ncol;
1652   }
1653   aj = Aloc->j;
1654   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1655 
1656   /* copy over the B part */
1657   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1658   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1659   array = Bloc->a;
1660   row = A->rmap->rstart;
1661   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1662   cols_tmp = cols;
1663   for (i=0; i<mb; i++) {
1664     ncol = bi[i+1]-bi[i];
1665     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1666     row++; array += ncol; cols_tmp += ncol;
1667   }
1668   ierr = PetscFree(cols);CHKERRQ(ierr);
1669 
1670   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1671   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1672   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1673     *matout = B;
1674   } else {
1675     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1676   }
1677   PetscFunctionReturn(0);
1678 }
1679 
1680 #undef __FUNCT__
1681 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1682 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1683 {
1684   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1685   Mat            a = aij->A,b = aij->B;
1686   PetscErrorCode ierr;
1687   PetscInt       s1,s2,s3;
1688 
1689   PetscFunctionBegin;
1690   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1691   if (rr) {
1692     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1693     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1694     /* Overlap communication with computation. */
1695     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1696   }
1697   if (ll) {
1698     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1699     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1700     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1701   }
1702   /* scale  the diagonal block */
1703   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1704 
1705   if (rr) {
1706     /* Do a scatter end and then right scale the off-diagonal block */
1707     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1708     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1709   }
1710 
1711   PetscFunctionReturn(0);
1712 }
1713 
1714 #undef __FUNCT__
1715 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1716 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1717 {
1718   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1719   PetscErrorCode ierr;
1720 
1721   PetscFunctionBegin;
1722   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1723   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1724   PetscFunctionReturn(0);
1725 }
1726 #undef __FUNCT__
1727 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1728 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1729 {
1730   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1731   PetscErrorCode ierr;
1732 
1733   PetscFunctionBegin;
1734   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1735   PetscFunctionReturn(0);
1736 }
1737 
1738 #undef __FUNCT__
1739 #define __FUNCT__ "MatEqual_MPIAIJ"
1740 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1741 {
1742   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1743   Mat            a,b,c,d;
1744   PetscTruth     flg;
1745   PetscErrorCode ierr;
1746 
1747   PetscFunctionBegin;
1748   a = matA->A; b = matA->B;
1749   c = matB->A; d = matB->B;
1750 
1751   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1752   if (flg) {
1753     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1754   }
1755   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1756   PetscFunctionReturn(0);
1757 }
1758 
1759 #undef __FUNCT__
1760 #define __FUNCT__ "MatCopy_MPIAIJ"
1761 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1762 {
1763   PetscErrorCode ierr;
1764   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1765   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1766 
1767   PetscFunctionBegin;
1768   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1769   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1770     /* because of the column compression in the off-processor part of the matrix a->B,
1771        the number of columns in a->B and b->B may be different, hence we cannot call
1772        the MatCopy() directly on the two parts. If need be, we can provide a more
1773        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1774        then copying the submatrices */
1775     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1776   } else {
1777     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1778     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1779   }
1780   PetscFunctionReturn(0);
1781 }
1782 
1783 #undef __FUNCT__
1784 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1785 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1786 {
1787   PetscErrorCode ierr;
1788 
1789   PetscFunctionBegin;
1790   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1791   PetscFunctionReturn(0);
1792 }
1793 
1794 #include "petscblaslapack.h"
1795 #undef __FUNCT__
1796 #define __FUNCT__ "MatAXPY_MPIAIJ"
1797 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1798 {
1799   PetscErrorCode ierr;
1800   PetscInt       i;
1801   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1802   PetscBLASInt   bnz,one=1;
1803   Mat_SeqAIJ     *x,*y;
1804 
1805   PetscFunctionBegin;
1806   if (str == SAME_NONZERO_PATTERN) {
1807     PetscScalar alpha = a;
1808     x = (Mat_SeqAIJ *)xx->A->data;
1809     y = (Mat_SeqAIJ *)yy->A->data;
1810     bnz = PetscBLASIntCast(x->nz);
1811     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1812     x = (Mat_SeqAIJ *)xx->B->data;
1813     y = (Mat_SeqAIJ *)yy->B->data;
1814     bnz = PetscBLASIntCast(x->nz);
1815     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1816   } else if (str == SUBSET_NONZERO_PATTERN) {
1817     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1818 
1819     x = (Mat_SeqAIJ *)xx->B->data;
1820     y = (Mat_SeqAIJ *)yy->B->data;
1821     if (y->xtoy && y->XtoY != xx->B) {
1822       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1823       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1824     }
1825     if (!y->xtoy) { /* get xtoy */
1826       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1827       y->XtoY = xx->B;
1828       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1829     }
1830     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1831   } else {
1832     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1833   }
1834   PetscFunctionReturn(0);
1835 }
1836 
1837 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1838 
1839 #undef __FUNCT__
1840 #define __FUNCT__ "MatConjugate_MPIAIJ"
1841 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1842 {
1843 #if defined(PETSC_USE_COMPLEX)
1844   PetscErrorCode ierr;
1845   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1846 
1847   PetscFunctionBegin;
1848   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1849   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1850 #else
1851   PetscFunctionBegin;
1852 #endif
1853   PetscFunctionReturn(0);
1854 }
1855 
1856 #undef __FUNCT__
1857 #define __FUNCT__ "MatRealPart_MPIAIJ"
1858 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1859 {
1860   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1861   PetscErrorCode ierr;
1862 
1863   PetscFunctionBegin;
1864   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1865   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1866   PetscFunctionReturn(0);
1867 }
1868 
1869 #undef __FUNCT__
1870 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1871 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1872 {
1873   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1874   PetscErrorCode ierr;
1875 
1876   PetscFunctionBegin;
1877   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1878   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1879   PetscFunctionReturn(0);
1880 }
1881 
1882 #ifdef PETSC_HAVE_PBGL
1883 
1884 #include <boost/parallel/mpi/bsp_process_group.hpp>
1885 #include <boost/graph/distributed/ilu_default_graph.hpp>
1886 #include <boost/graph/distributed/ilu_0_block.hpp>
1887 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1888 #include <boost/graph/distributed/petsc/interface.hpp>
1889 #include <boost/multi_array.hpp>
1890 #include <boost/parallel/distributed_property_map->hpp>
1891 
1892 #undef __FUNCT__
1893 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1894 /*
1895   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1896 */
1897 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1898 {
1899   namespace petsc = boost::distributed::petsc;
1900 
1901   namespace graph_dist = boost::graph::distributed;
1902   using boost::graph::distributed::ilu_default::process_group_type;
1903   using boost::graph::ilu_permuted;
1904 
1905   PetscTruth      row_identity, col_identity;
1906   PetscContainer  c;
1907   PetscInt        m, n, M, N;
1908   PetscErrorCode  ierr;
1909 
1910   PetscFunctionBegin;
1911   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1912   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1913   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1914   if (!row_identity || !col_identity) {
1915     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1916   }
1917 
1918   process_group_type pg;
1919   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1920   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1921   lgraph_type&   level_graph = *lgraph_p;
1922   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1923 
1924   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1925   ilu_permuted(level_graph);
1926 
1927   /* put together the new matrix */
1928   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1929   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1930   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1931   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1932   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1933   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1934   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1935 
1936   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1937   ierr = PetscContainerSetPointer(c, lgraph_p);
1938   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1939   PetscFunctionReturn(0);
1940 }
1941 
1942 #undef __FUNCT__
1943 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1944 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1945 {
1946   PetscFunctionBegin;
1947   PetscFunctionReturn(0);
1948 }
1949 
1950 #undef __FUNCT__
1951 #define __FUNCT__ "MatSolve_MPIAIJ"
1952 /*
1953   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1954 */
1955 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1956 {
1957   namespace graph_dist = boost::graph::distributed;
1958 
1959   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1960   lgraph_type*   lgraph_p;
1961   PetscContainer c;
1962   PetscErrorCode ierr;
1963 
1964   PetscFunctionBegin;
1965   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1966   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1967   ierr = VecCopy(b, x);CHKERRQ(ierr);
1968 
1969   PetscScalar* array_x;
1970   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1971   PetscInt sx;
1972   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1973 
1974   PetscScalar* array_b;
1975   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1976   PetscInt sb;
1977   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1978 
1979   lgraph_type&   level_graph = *lgraph_p;
1980   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1981 
1982   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
1983   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
1984                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
1985 
1986   typedef boost::iterator_property_map<array_ref_type::iterator,
1987                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
1988   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
1989                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
1990 
1991   ilu_set_solve(*lgraph_p, vector_b, vector_x);
1992 
1993   PetscFunctionReturn(0);
1994 }
1995 #endif
1996 
1997 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
1998   PetscInt       nzlocal,nsends,nrecvs;
1999   PetscMPIInt    *send_rank;
2000   PetscInt       *sbuf_nz,*sbuf_j,**rbuf_j;
2001   PetscScalar    *sbuf_a,**rbuf_a;
2002   PetscErrorCode (*MatDestroy)(Mat);
2003 } Mat_Redundant;
2004 
2005 #undef __FUNCT__
2006 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2007 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2008 {
2009   PetscErrorCode       ierr;
2010   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2011   PetscInt             i;
2012 
2013   PetscFunctionBegin;
2014   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
2015   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2016   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2017   for (i=0; i<redund->nrecvs; i++){
2018     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2019     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2020   }
2021   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2022   ierr = PetscFree(redund);CHKERRQ(ierr);
2023   PetscFunctionReturn(0);
2024 }
2025 
2026 #undef __FUNCT__
2027 #define __FUNCT__ "MatDestroy_MatRedundant"
2028 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2029 {
2030   PetscErrorCode  ierr;
2031   PetscContainer  container;
2032   Mat_Redundant   *redund=PETSC_NULL;
2033 
2034   PetscFunctionBegin;
2035   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2036   if (container) {
2037     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2038   } else {
2039     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2040   }
2041   A->ops->destroy = redund->MatDestroy;
2042   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2043   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2044   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2045   PetscFunctionReturn(0);
2046 }
2047 
2048 #undef __FUNCT__
2049 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2050 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2051 {
2052   PetscMPIInt    rank,size;
2053   MPI_Comm       comm=((PetscObject)mat)->comm;
2054   PetscErrorCode ierr;
2055   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2056   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2057   PetscInt       *rowrange=mat->rmap->range;
2058   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2059   Mat            A=aij->A,B=aij->B,C=*matredundant;
2060   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2061   PetscScalar    *sbuf_a;
2062   PetscInt       nzlocal=a->nz+b->nz;
2063   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2064   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2065   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2066   MatScalar      *aworkA,*aworkB;
2067   PetscScalar    *vals;
2068   PetscMPIInt    tag1,tag2,tag3,imdex;
2069   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2070                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2071   MPI_Status     recv_status,*send_status;
2072   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2073   PetscInt       **rbuf_j=PETSC_NULL;
2074   PetscScalar    **rbuf_a=PETSC_NULL;
2075   Mat_Redundant  *redund=PETSC_NULL;
2076   PetscContainer container;
2077 
2078   PetscFunctionBegin;
2079   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2080   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2081 
2082   if (reuse == MAT_REUSE_MATRIX) {
2083     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2084     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2085     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2086     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2087     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2088     if (container) {
2089       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2090     } else {
2091       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2092     }
2093     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2094 
2095     nsends    = redund->nsends;
2096     nrecvs    = redund->nrecvs;
2097     send_rank = redund->send_rank; recv_rank = send_rank + size;
2098     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
2099     sbuf_j    = redund->sbuf_j;
2100     sbuf_a    = redund->sbuf_a;
2101     rbuf_j    = redund->rbuf_j;
2102     rbuf_a    = redund->rbuf_a;
2103   }
2104 
2105   if (reuse == MAT_INITIAL_MATRIX){
2106     PetscMPIInt  subrank,subsize;
2107     PetscInt     nleftover,np_subcomm;
2108     /* get the destination processors' id send_rank, nsends and nrecvs */
2109     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2110     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2111     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
2112     recv_rank = send_rank + size;
2113     np_subcomm = size/nsubcomm;
2114     nleftover  = size - nsubcomm*np_subcomm;
2115     nsends = 0; nrecvs = 0;
2116     for (i=0; i<size; i++){ /* i=rank*/
2117       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2118         send_rank[nsends] = i; nsends++;
2119         recv_rank[nrecvs++] = i;
2120       }
2121     }
2122     if (rank >= size - nleftover){/* this proc is a leftover processor */
2123       i = size-nleftover-1;
2124       j = 0;
2125       while (j < nsubcomm - nleftover){
2126         send_rank[nsends++] = i;
2127         i--; j++;
2128       }
2129     }
2130 
2131     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2132       for (i=0; i<nleftover; i++){
2133         recv_rank[nrecvs++] = size-nleftover+i;
2134       }
2135     }
2136 
2137     /* allocate sbuf_j, sbuf_a */
2138     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2139     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2140     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2141   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2142 
2143   /* copy mat's local entries into the buffers */
2144   if (reuse == MAT_INITIAL_MATRIX){
2145     rownz_max = 0;
2146     rptr = sbuf_j;
2147     cols = sbuf_j + rend-rstart + 1;
2148     vals = sbuf_a;
2149     rptr[0] = 0;
2150     for (i=0; i<rend-rstart; i++){
2151       row = i + rstart;
2152       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2153       ncols  = nzA + nzB;
2154       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2155       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2156       /* load the column indices for this row into cols */
2157       lwrite = 0;
2158       for (l=0; l<nzB; l++) {
2159         if ((ctmp = bmap[cworkB[l]]) < cstart){
2160           vals[lwrite]   = aworkB[l];
2161           cols[lwrite++] = ctmp;
2162         }
2163       }
2164       for (l=0; l<nzA; l++){
2165         vals[lwrite]   = aworkA[l];
2166         cols[lwrite++] = cstart + cworkA[l];
2167       }
2168       for (l=0; l<nzB; l++) {
2169         if ((ctmp = bmap[cworkB[l]]) >= cend){
2170           vals[lwrite]   = aworkB[l];
2171           cols[lwrite++] = ctmp;
2172         }
2173       }
2174       vals += ncols;
2175       cols += ncols;
2176       rptr[i+1] = rptr[i] + ncols;
2177       if (rownz_max < ncols) rownz_max = ncols;
2178     }
2179     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2180   } else { /* only copy matrix values into sbuf_a */
2181     rptr = sbuf_j;
2182     vals = sbuf_a;
2183     rptr[0] = 0;
2184     for (i=0; i<rend-rstart; i++){
2185       row = i + rstart;
2186       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2187       ncols  = nzA + nzB;
2188       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2189       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2190       lwrite = 0;
2191       for (l=0; l<nzB; l++) {
2192         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2193       }
2194       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2195       for (l=0; l<nzB; l++) {
2196         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2197       }
2198       vals += ncols;
2199       rptr[i+1] = rptr[i] + ncols;
2200     }
2201   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2202 
2203   /* send nzlocal to others, and recv other's nzlocal */
2204   /*--------------------------------------------------*/
2205   if (reuse == MAT_INITIAL_MATRIX){
2206     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2207     s_waits2 = s_waits3 + nsends;
2208     s_waits1 = s_waits2 + nsends;
2209     r_waits1 = s_waits1 + nsends;
2210     r_waits2 = r_waits1 + nrecvs;
2211     r_waits3 = r_waits2 + nrecvs;
2212   } else {
2213     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2214     r_waits3 = s_waits3 + nsends;
2215   }
2216 
2217   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2218   if (reuse == MAT_INITIAL_MATRIX){
2219     /* get new tags to keep the communication clean */
2220     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2221     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2222     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2223     rbuf_nz = sbuf_nz + nsends;
2224 
2225     /* post receives of other's nzlocal */
2226     for (i=0; i<nrecvs; i++){
2227       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2228     }
2229     /* send nzlocal to others */
2230     for (i=0; i<nsends; i++){
2231       sbuf_nz[i] = nzlocal;
2232       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2233     }
2234     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2235     count = nrecvs;
2236     while (count) {
2237       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2238       recv_rank[imdex] = recv_status.MPI_SOURCE;
2239       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2240       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2241 
2242       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2243       rbuf_nz[imdex] += i + 2;
2244       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2245       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2246       count--;
2247     }
2248     /* wait on sends of nzlocal */
2249     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2250     /* send mat->i,j to others, and recv from other's */
2251     /*------------------------------------------------*/
2252     for (i=0; i<nsends; i++){
2253       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2254       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2255     }
2256     /* wait on receives of mat->i,j */
2257     /*------------------------------*/
2258     count = nrecvs;
2259     while (count) {
2260       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2261       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2262       count--;
2263     }
2264     /* wait on sends of mat->i,j */
2265     /*---------------------------*/
2266     if (nsends) {
2267       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2268     }
2269   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2270 
2271   /* post receives, send and receive mat->a */
2272   /*----------------------------------------*/
2273   for (imdex=0; imdex<nrecvs; imdex++) {
2274     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2275   }
2276   for (i=0; i<nsends; i++){
2277     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2278   }
2279   count = nrecvs;
2280   while (count) {
2281     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2282     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2283     count--;
2284   }
2285   if (nsends) {
2286     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2287   }
2288 
2289   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2290 
2291   /* create redundant matrix */
2292   /*-------------------------*/
2293   if (reuse == MAT_INITIAL_MATRIX){
2294     /* compute rownz_max for preallocation */
2295     for (imdex=0; imdex<nrecvs; imdex++){
2296       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2297       rptr = rbuf_j[imdex];
2298       for (i=0; i<j; i++){
2299         ncols = rptr[i+1] - rptr[i];
2300         if (rownz_max < ncols) rownz_max = ncols;
2301       }
2302     }
2303 
2304     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2305     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2306     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2307     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2308     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2309   } else {
2310     C = *matredundant;
2311   }
2312 
2313   /* insert local matrix entries */
2314   rptr = sbuf_j;
2315   cols = sbuf_j + rend-rstart + 1;
2316   vals = sbuf_a;
2317   for (i=0; i<rend-rstart; i++){
2318     row   = i + rstart;
2319     ncols = rptr[i+1] - rptr[i];
2320     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2321     vals += ncols;
2322     cols += ncols;
2323   }
2324   /* insert received matrix entries */
2325   for (imdex=0; imdex<nrecvs; imdex++){
2326     rstart = rowrange[recv_rank[imdex]];
2327     rend   = rowrange[recv_rank[imdex]+1];
2328     rptr = rbuf_j[imdex];
2329     cols = rbuf_j[imdex] + rend-rstart + 1;
2330     vals = rbuf_a[imdex];
2331     for (i=0; i<rend-rstart; i++){
2332       row   = i + rstart;
2333       ncols = rptr[i+1] - rptr[i];
2334       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2335       vals += ncols;
2336       cols += ncols;
2337     }
2338   }
2339   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2340   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2341   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2342   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2343   if (reuse == MAT_INITIAL_MATRIX){
2344     PetscContainer container;
2345     *matredundant = C;
2346     /* create a supporting struct and attach it to C for reuse */
2347     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2348     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2349     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2350     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2351     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2352 
2353     redund->nzlocal = nzlocal;
2354     redund->nsends  = nsends;
2355     redund->nrecvs  = nrecvs;
2356     redund->send_rank = send_rank;
2357     redund->sbuf_nz = sbuf_nz;
2358     redund->sbuf_j  = sbuf_j;
2359     redund->sbuf_a  = sbuf_a;
2360     redund->rbuf_j  = rbuf_j;
2361     redund->rbuf_a  = rbuf_a;
2362 
2363     redund->MatDestroy = C->ops->destroy;
2364     C->ops->destroy    = MatDestroy_MatRedundant;
2365   }
2366   PetscFunctionReturn(0);
2367 }
2368 
2369 #undef __FUNCT__
2370 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2371 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2372 {
2373   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2374   PetscErrorCode ierr;
2375   PetscInt       i,*idxb = 0;
2376   PetscScalar    *va,*vb;
2377   Vec            vtmp;
2378 
2379   PetscFunctionBegin;
2380   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2381   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2382   if (idx) {
2383     for (i=0; i<A->rmap->n; i++) {
2384       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2385     }
2386   }
2387 
2388   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2389   if (idx) {
2390     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2391   }
2392   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2393   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2394 
2395   for (i=0; i<A->rmap->n; i++){
2396     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2397       va[i] = vb[i];
2398       if (idx) idx[i] = a->garray[idxb[i]];
2399     }
2400   }
2401 
2402   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2403   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2404   if (idxb) {
2405     ierr = PetscFree(idxb);CHKERRQ(ierr);
2406   }
2407   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2408   PetscFunctionReturn(0);
2409 }
2410 
2411 #undef __FUNCT__
2412 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2413 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2414 {
2415   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2416   PetscErrorCode ierr;
2417   PetscInt       i,*idxb = 0;
2418   PetscScalar    *va,*vb;
2419   Vec            vtmp;
2420 
2421   PetscFunctionBegin;
2422   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2423   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2424   if (idx) {
2425     for (i=0; i<A->cmap->n; i++) {
2426       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2427     }
2428   }
2429 
2430   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2431   if (idx) {
2432     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2433   }
2434   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2435   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2436 
2437   for (i=0; i<A->rmap->n; i++){
2438     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2439       va[i] = vb[i];
2440       if (idx) idx[i] = a->garray[idxb[i]];
2441     }
2442   }
2443 
2444   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2445   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2446   if (idxb) {
2447     ierr = PetscFree(idxb);CHKERRQ(ierr);
2448   }
2449   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2450   PetscFunctionReturn(0);
2451 }
2452 
2453 #undef __FUNCT__
2454 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2455 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2456 {
2457   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2458   PetscInt       n      = A->rmap->n;
2459   PetscInt       cstart = A->cmap->rstart;
2460   PetscInt      *cmap   = mat->garray;
2461   PetscInt      *diagIdx, *offdiagIdx;
2462   Vec            diagV, offdiagV;
2463   PetscScalar   *a, *diagA, *offdiagA;
2464   PetscInt       r;
2465   PetscErrorCode ierr;
2466 
2467   PetscFunctionBegin;
2468   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2469   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2470   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2471   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2472   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2473   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2474   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2475   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2476   for(r = 0; r < n; ++r) {
2477     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2478       a[r]   = diagA[r];
2479       idx[r] = cstart + diagIdx[r];
2480     } else {
2481       a[r]   = offdiagA[r];
2482       idx[r] = cmap[offdiagIdx[r]];
2483     }
2484   }
2485   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2486   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2487   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2488   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2489   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2490   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2491   PetscFunctionReturn(0);
2492 }
2493 
2494 #undef __FUNCT__
2495 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2496 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2497 {
2498   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2499   PetscInt       n      = A->rmap->n;
2500   PetscInt       cstart = A->cmap->rstart;
2501   PetscInt      *cmap   = mat->garray;
2502   PetscInt      *diagIdx, *offdiagIdx;
2503   Vec            diagV, offdiagV;
2504   PetscScalar   *a, *diagA, *offdiagA;
2505   PetscInt       r;
2506   PetscErrorCode ierr;
2507 
2508   PetscFunctionBegin;
2509   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2510   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2511   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2512   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2513   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2514   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2515   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2516   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2517   for(r = 0; r < n; ++r) {
2518     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2519       a[r]   = diagA[r];
2520       idx[r] = cstart + diagIdx[r];
2521     } else {
2522       a[r]   = offdiagA[r];
2523       idx[r] = cmap[offdiagIdx[r]];
2524     }
2525   }
2526   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2527   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2528   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2529   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2530   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2531   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2532   PetscFunctionReturn(0);
2533 }
2534 
2535 #undef __FUNCT__
2536 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2537 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[])
2538 {
2539   PetscErrorCode ierr;
2540 
2541   PetscFunctionBegin;
2542   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
2543   PetscFunctionReturn(0);
2544 }
2545 
2546 /* -------------------------------------------------------------------*/
2547 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2548        MatGetRow_MPIAIJ,
2549        MatRestoreRow_MPIAIJ,
2550        MatMult_MPIAIJ,
2551 /* 4*/ MatMultAdd_MPIAIJ,
2552        MatMultTranspose_MPIAIJ,
2553        MatMultTransposeAdd_MPIAIJ,
2554 #ifdef PETSC_HAVE_PBGL
2555        MatSolve_MPIAIJ,
2556 #else
2557        0,
2558 #endif
2559        0,
2560        0,
2561 /*10*/ 0,
2562        0,
2563        0,
2564        MatRelax_MPIAIJ,
2565        MatTranspose_MPIAIJ,
2566 /*15*/ MatGetInfo_MPIAIJ,
2567        MatEqual_MPIAIJ,
2568        MatGetDiagonal_MPIAIJ,
2569        MatDiagonalScale_MPIAIJ,
2570        MatNorm_MPIAIJ,
2571 /*20*/ MatAssemblyBegin_MPIAIJ,
2572        MatAssemblyEnd_MPIAIJ,
2573        MatSetOption_MPIAIJ,
2574        MatZeroEntries_MPIAIJ,
2575 /*24*/ MatZeroRows_MPIAIJ,
2576        0,
2577 #ifdef PETSC_HAVE_PBGL
2578        0,
2579 #else
2580        0,
2581 #endif
2582        0,
2583        0,
2584 /*29*/ MatSetUpPreallocation_MPIAIJ,
2585 #ifdef PETSC_HAVE_PBGL
2586        0,
2587 #else
2588        0,
2589 #endif
2590        0,
2591        0,
2592        0,
2593 /*34*/ MatDuplicate_MPIAIJ,
2594        0,
2595        0,
2596        0,
2597        0,
2598 /*39*/ MatAXPY_MPIAIJ,
2599        MatGetSubMatrices_MPIAIJ,
2600        MatIncreaseOverlap_MPIAIJ,
2601        MatGetValues_MPIAIJ,
2602        MatCopy_MPIAIJ,
2603 /*44*/ MatGetRowMax_MPIAIJ,
2604        MatScale_MPIAIJ,
2605        0,
2606        0,
2607        0,
2608 /*49*/ MatSetBlockSize_MPIAIJ,
2609        0,
2610        0,
2611        0,
2612        0,
2613 /*54*/ MatFDColoringCreate_MPIAIJ,
2614        0,
2615        MatSetUnfactored_MPIAIJ,
2616        MatPermute_MPIAIJ,
2617        0,
2618 /*59*/ MatGetSubMatrix_MPIAIJ,
2619        MatDestroy_MPIAIJ,
2620        MatView_MPIAIJ,
2621        0,
2622        0,
2623 /*64*/ 0,
2624        0,
2625        0,
2626        0,
2627        0,
2628 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2629        MatGetRowMinAbs_MPIAIJ,
2630        0,
2631        MatSetColoring_MPIAIJ,
2632 #if defined(PETSC_HAVE_ADIC)
2633        MatSetValuesAdic_MPIAIJ,
2634 #else
2635        0,
2636 #endif
2637        MatSetValuesAdifor_MPIAIJ,
2638 /*75*/ 0,
2639        0,
2640        0,
2641        0,
2642        0,
2643 /*80*/ 0,
2644        0,
2645        0,
2646 /*83*/ MatLoad_MPIAIJ,
2647        0,
2648        0,
2649        0,
2650        0,
2651        0,
2652 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2653        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2654        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2655        MatPtAP_Basic,
2656        MatPtAPSymbolic_MPIAIJ,
2657 /*94*/ MatPtAPNumeric_MPIAIJ,
2658        0,
2659        0,
2660        0,
2661        0,
2662 /*99*/ 0,
2663        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2664        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2665        MatConjugate_MPIAIJ,
2666        0,
2667 /*104*/MatSetValuesRow_MPIAIJ,
2668        MatRealPart_MPIAIJ,
2669        MatImaginaryPart_MPIAIJ,
2670        0,
2671        0,
2672 /*109*/0,
2673        MatGetRedundantMatrix_MPIAIJ,
2674        MatGetRowMin_MPIAIJ,
2675        0,
2676        0,
2677 /*114*/MatGetSeqNonzerostructure_MPIAIJ};
2678 
2679 /* ----------------------------------------------------------------------------------------*/
2680 
2681 EXTERN_C_BEGIN
2682 #undef __FUNCT__
2683 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2684 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2685 {
2686   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2687   PetscErrorCode ierr;
2688 
2689   PetscFunctionBegin;
2690   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2691   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2692   PetscFunctionReturn(0);
2693 }
2694 EXTERN_C_END
2695 
2696 EXTERN_C_BEGIN
2697 #undef __FUNCT__
2698 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2699 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2700 {
2701   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2702   PetscErrorCode ierr;
2703 
2704   PetscFunctionBegin;
2705   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2706   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2707   PetscFunctionReturn(0);
2708 }
2709 EXTERN_C_END
2710 
2711 #include "petscpc.h"
2712 EXTERN_C_BEGIN
2713 #undef __FUNCT__
2714 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2715 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2716 {
2717   Mat_MPIAIJ     *b;
2718   PetscErrorCode ierr;
2719   PetscInt       i;
2720 
2721   PetscFunctionBegin;
2722   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2723   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2724   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2725   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2726 
2727   ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2728   ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2729   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2730   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2731   if (d_nnz) {
2732     for (i=0; i<B->rmap->n; i++) {
2733       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2734     }
2735   }
2736   if (o_nnz) {
2737     for (i=0; i<B->rmap->n; i++) {
2738       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2739     }
2740   }
2741   b = (Mat_MPIAIJ*)B->data;
2742 
2743   if (!B->preallocated) {
2744     /* Explicitly create 2 MATSEQAIJ matrices. */
2745     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2746     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2747     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2748     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2749     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2750     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2751     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2752     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2753   }
2754 
2755   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2756   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2757   B->preallocated = PETSC_TRUE;
2758   PetscFunctionReturn(0);
2759 }
2760 EXTERN_C_END
2761 
2762 #undef __FUNCT__
2763 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2764 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2765 {
2766   Mat            mat;
2767   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2768   PetscErrorCode ierr;
2769 
2770   PetscFunctionBegin;
2771   *newmat       = 0;
2772   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2773   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2774   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2775   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2776   a    = (Mat_MPIAIJ*)mat->data;
2777 
2778   mat->factor       = matin->factor;
2779   mat->rmap->bs      = matin->rmap->bs;
2780   mat->assembled    = PETSC_TRUE;
2781   mat->insertmode   = NOT_SET_VALUES;
2782   mat->preallocated = PETSC_TRUE;
2783 
2784   a->size           = oldmat->size;
2785   a->rank           = oldmat->rank;
2786   a->donotstash     = oldmat->donotstash;
2787   a->roworiented    = oldmat->roworiented;
2788   a->rowindices     = 0;
2789   a->rowvalues      = 0;
2790   a->getrowactive   = PETSC_FALSE;
2791 
2792   ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr);
2793   ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr);
2794 
2795   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2796   if (oldmat->colmap) {
2797 #if defined (PETSC_USE_CTABLE)
2798     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2799 #else
2800     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2801     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2802     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2803 #endif
2804   } else a->colmap = 0;
2805   if (oldmat->garray) {
2806     PetscInt len;
2807     len  = oldmat->B->cmap->n;
2808     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2809     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2810     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2811   } else a->garray = 0;
2812 
2813   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2814   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2815   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2816   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2817   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2818   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2819   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2820   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2821   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2822   *newmat = mat;
2823   PetscFunctionReturn(0);
2824 }
2825 
2826 #include "petscsys.h"
2827 
2828 #undef __FUNCT__
2829 #define __FUNCT__ "MatLoad_MPIAIJ"
2830 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2831 {
2832   Mat            A;
2833   PetscScalar    *vals,*svals;
2834   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2835   MPI_Status     status;
2836   PetscErrorCode ierr;
2837   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz;
2838   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2839   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2840   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2841   PetscInt       cend,cstart,n,*rowners;
2842   int            fd;
2843 
2844   PetscFunctionBegin;
2845   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2846   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2847   if (!rank) {
2848     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2849     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2850     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2851   }
2852 
2853   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2854   M = header[1]; N = header[2];
2855   /* determine ownership of all rows */
2856   m    = M/size + ((M % size) > rank);
2857   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2858   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2859 
2860   /* First process needs enough room for process with most rows */
2861   if (!rank) {
2862     mmax       = rowners[1];
2863     for (i=2; i<size; i++) {
2864       mmax = PetscMax(mmax,rowners[i]);
2865     }
2866   } else mmax = m;
2867 
2868   rowners[0] = 0;
2869   for (i=2; i<=size; i++) {
2870     rowners[i] += rowners[i-1];
2871   }
2872   rstart = rowners[rank];
2873   rend   = rowners[rank+1];
2874 
2875   /* distribute row lengths to all processors */
2876   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2877   if (!rank) {
2878     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2879     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2880     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2881     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2882     for (j=0; j<m; j++) {
2883       procsnz[0] += ourlens[j];
2884     }
2885     for (i=1; i<size; i++) {
2886       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2887       /* calculate the number of nonzeros on each processor */
2888       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2889         procsnz[i] += rowlengths[j];
2890       }
2891       mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]);
2892       ierr   = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2893     }
2894     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2895   } else {
2896     mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr);
2897     ierr   = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2898   }
2899 
2900   if (!rank) {
2901     /* determine max buffer needed and allocate it */
2902     maxnz = 0;
2903     for (i=0; i<size; i++) {
2904       maxnz = PetscMax(maxnz,procsnz[i]);
2905     }
2906     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2907 
2908     /* read in my part of the matrix column indices  */
2909     nz   = procsnz[0];
2910     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2911     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2912 
2913     /* read in every one elses and ship off */
2914     for (i=1; i<size; i++) {
2915       nz     = procsnz[i];
2916       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2917       mpicnt = PetscMPIIntCast(nz);
2918       ierr   = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2919     }
2920     ierr = PetscFree(cols);CHKERRQ(ierr);
2921   } else {
2922     /* determine buffer space needed for message */
2923     nz = 0;
2924     for (i=0; i<m; i++) {
2925       nz += ourlens[i];
2926     }
2927     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2928 
2929     /* receive message of column indices*/
2930     mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr);
2931     ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2932     ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr);
2933     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
2934     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
2935     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
2936   }
2937 
2938   /* determine column ownership if matrix is not square */
2939   if (N != M) {
2940     n      = N/size + ((N % size) > rank);
2941     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2942     cstart = cend - n;
2943   } else {
2944     cstart = rstart;
2945     cend   = rend;
2946     n      = cend - cstart;
2947   }
2948 
2949   /* loop over local rows, determining number of off diagonal entries */
2950   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2951   jj = 0;
2952   for (i=0; i<m; i++) {
2953     for (j=0; j<ourlens[i]; j++) {
2954       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2955       jj++;
2956     }
2957   }
2958 
2959   /* create our matrix */
2960   for (i=0; i<m; i++) {
2961     ourlens[i] -= offlens[i];
2962   }
2963   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2964   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2965   ierr = MatSetType(A,type);CHKERRQ(ierr);
2966   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2967 
2968   for (i=0; i<m; i++) {
2969     ourlens[i] += offlens[i];
2970   }
2971 
2972   if (!rank) {
2973     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2974 
2975     /* read in my part of the matrix numerical values  */
2976     nz   = procsnz[0];
2977     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2978 
2979     /* insert into matrix */
2980     jj      = rstart;
2981     smycols = mycols;
2982     svals   = vals;
2983     for (i=0; i<m; i++) {
2984       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2985       smycols += ourlens[i];
2986       svals   += ourlens[i];
2987       jj++;
2988     }
2989 
2990     /* read in other processors and ship out */
2991     for (i=1; i<size; i++) {
2992       nz     = procsnz[i];
2993       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2994       mpicnt = PetscMPIIntCast(nz);
2995       ierr   = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2996     }
2997     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2998   } else {
2999     /* receive numeric values */
3000     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3001 
3002     /* receive message of values*/
3003     mpicnt = PetscMPIIntCast(nz);
3004     ierr   = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
3005     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr);
3006     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
3007     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
3008     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
3009 
3010     /* insert into matrix */
3011     jj      = rstart;
3012     smycols = mycols;
3013     svals   = vals;
3014     for (i=0; i<m; i++) {
3015       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3016       smycols += ourlens[i];
3017       svals   += ourlens[i];
3018       jj++;
3019     }
3020   }
3021   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3022   ierr = PetscFree(vals);CHKERRQ(ierr);
3023   ierr = PetscFree(mycols);CHKERRQ(ierr);
3024   ierr = PetscFree(rowners);CHKERRQ(ierr);
3025 
3026   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3027   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3028   *newmat = A;
3029   PetscFunctionReturn(0);
3030 }
3031 
3032 #undef __FUNCT__
3033 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3034 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3035 {
3036   PetscErrorCode ierr;
3037   IS             iscol_local;
3038   PetscInt       csize;
3039 
3040   PetscFunctionBegin;
3041   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3042   if (call == MAT_REUSE_MATRIX) {
3043     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3044     if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3045   } else {
3046     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3047   }
3048   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3049   if (call == MAT_INITIAL_MATRIX) {
3050     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3051     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3052   }
3053   PetscFunctionReturn(0);
3054 }
3055 
3056 #undef __FUNCT__
3057 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3058 /*
3059     Not great since it makes two copies of the submatrix, first an SeqAIJ
3060   in local and then by concatenating the local matrices the end result.
3061   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3062 
3063   Note: This requires a sequential iscol with all indices.
3064 */
3065 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3066 {
3067   PetscErrorCode ierr;
3068   PetscMPIInt    rank,size;
3069   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3070   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3071   Mat            *local,M,Mreuse;
3072   MatScalar      *vwork,*aa;
3073   MPI_Comm       comm = ((PetscObject)mat)->comm;
3074   Mat_SeqAIJ     *aij;
3075 
3076 
3077   PetscFunctionBegin;
3078   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3079   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3080 
3081   if (call ==  MAT_REUSE_MATRIX) {
3082     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3083     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3084     local = &Mreuse;
3085     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3086   } else {
3087     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3088     Mreuse = *local;
3089     ierr   = PetscFree(local);CHKERRQ(ierr);
3090   }
3091 
3092   /*
3093       m - number of local rows
3094       n - number of columns (same on all processors)
3095       rstart - first row in new global matrix generated
3096   */
3097   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3098   if (call == MAT_INITIAL_MATRIX) {
3099     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3100     ii  = aij->i;
3101     jj  = aij->j;
3102 
3103     /*
3104         Determine the number of non-zeros in the diagonal and off-diagonal
3105         portions of the matrix in order to do correct preallocation
3106     */
3107 
3108     /* first get start and end of "diagonal" columns */
3109     if (csize == PETSC_DECIDE) {
3110       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3111       if (mglobal == n) { /* square matrix */
3112 	nlocal = m;
3113       } else {
3114         nlocal = n/size + ((n % size) > rank);
3115       }
3116     } else {
3117       nlocal = csize;
3118     }
3119     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3120     rstart = rend - nlocal;
3121     if (rank == size - 1 && rend != n) {
3122       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3123     }
3124 
3125     /* next, compute all the lengths */
3126     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3127     olens = dlens + m;
3128     for (i=0; i<m; i++) {
3129       jend = ii[i+1] - ii[i];
3130       olen = 0;
3131       dlen = 0;
3132       for (j=0; j<jend; j++) {
3133         if (*jj < rstart || *jj >= rend) olen++;
3134         else dlen++;
3135         jj++;
3136       }
3137       olens[i] = olen;
3138       dlens[i] = dlen;
3139     }
3140     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3141     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3142     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3143     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3144     ierr = PetscFree(dlens);CHKERRQ(ierr);
3145   } else {
3146     PetscInt ml,nl;
3147 
3148     M = *newmat;
3149     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3150     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3151     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3152     /*
3153          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3154        rather than the slower MatSetValues().
3155     */
3156     M->was_assembled = PETSC_TRUE;
3157     M->assembled     = PETSC_FALSE;
3158   }
3159   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3160   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3161   ii  = aij->i;
3162   jj  = aij->j;
3163   aa  = aij->a;
3164   for (i=0; i<m; i++) {
3165     row   = rstart + i;
3166     nz    = ii[i+1] - ii[i];
3167     cwork = jj;     jj += nz;
3168     vwork = aa;     aa += nz;
3169     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3170   }
3171 
3172   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3173   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3174   *newmat = M;
3175 
3176   /* save submatrix used in processor for next request */
3177   if (call ==  MAT_INITIAL_MATRIX) {
3178     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3179     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3180   }
3181 
3182   PetscFunctionReturn(0);
3183 }
3184 
3185 EXTERN_C_BEGIN
3186 #undef __FUNCT__
3187 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3188 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3189 {
3190   PetscInt       m,cstart, cend,j,nnz,i,d;
3191   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3192   const PetscInt *JJ;
3193   PetscScalar    *values;
3194   PetscErrorCode ierr;
3195 
3196   PetscFunctionBegin;
3197   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3198 
3199   ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3200   ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3201   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
3202   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
3203   m      = B->rmap->n;
3204   cstart = B->cmap->rstart;
3205   cend   = B->cmap->rend;
3206   rstart = B->rmap->rstart;
3207 
3208   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
3209   o_nnz = d_nnz + m;
3210 
3211 #if defined(PETSC_USE_DEBUGGING)
3212   for (i=0; i<m; i++) {
3213     nnz     = Ii[i+1]- Ii[i];
3214     JJ      = J + Ii[i];
3215     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3216     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3217     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3218     for (j=1; j<nnz; j++) {
3219       if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j);
3220     }
3221   }
3222 #endif
3223 
3224   for (i=0; i<m; i++) {
3225     nnz     = Ii[i+1]- Ii[i];
3226     JJ      = J + Ii[i];
3227     nnz_max = PetscMax(nnz_max,nnz);
3228     for (j=0; j<nnz; j++) {
3229       if (*JJ >= cstart) break;
3230       JJ++;
3231     }
3232     d = 0;
3233     for (; j<nnz; j++) {
3234       if (*JJ++ >= cend) break;
3235       d++;
3236     }
3237     d_nnz[i] = d;
3238     o_nnz[i] = nnz - d;
3239   }
3240   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3241   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
3242 
3243   if (v) values = (PetscScalar*)v;
3244   else {
3245     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3246     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3247   }
3248 
3249   for (i=0; i<m; i++) {
3250     ii   = i + rstart;
3251     nnz  = Ii[i+1]- Ii[i];
3252     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3253   }
3254   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3255   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3256 
3257   if (!v) {
3258     ierr = PetscFree(values);CHKERRQ(ierr);
3259   }
3260   PetscFunctionReturn(0);
3261 }
3262 EXTERN_C_END
3263 
3264 #undef __FUNCT__
3265 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3266 /*@
3267    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3268    (the default parallel PETSc format).
3269 
3270    Collective on MPI_Comm
3271 
3272    Input Parameters:
3273 +  B - the matrix
3274 .  i - the indices into j for the start of each local row (starts with zero)
3275 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3276 -  v - optional values in the matrix
3277 
3278    Level: developer
3279 
3280    Notes:
3281        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3282      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3283      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3284 
3285        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3286 
3287        The format which is used for the sparse matrix input, is equivalent to a
3288     row-major ordering.. i.e for the following matrix, the input data expected is
3289     as shown:
3290 
3291         1 0 0
3292         2 0 3     P0
3293        -------
3294         4 5 6     P1
3295 
3296      Process0 [P0]: rows_owned=[0,1]
3297         i =  {0,1,3}  [size = nrow+1  = 2+1]
3298         j =  {0,0,2}  [size = nz = 6]
3299         v =  {1,2,3}  [size = nz = 6]
3300 
3301      Process1 [P1]: rows_owned=[2]
3302         i =  {0,3}    [size = nrow+1  = 1+1]
3303         j =  {0,1,2}  [size = nz = 6]
3304         v =  {4,5,6}  [size = nz = 6]
3305 
3306       The column indices for each row MUST be sorted.
3307 
3308 .keywords: matrix, aij, compressed row, sparse, parallel
3309 
3310 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3311           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3312 @*/
3313 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3314 {
3315   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3316 
3317   PetscFunctionBegin;
3318   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3319   if (f) {
3320     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3321   }
3322   PetscFunctionReturn(0);
3323 }
3324 
3325 #undef __FUNCT__
3326 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3327 /*@C
3328    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3329    (the default parallel PETSc format).  For good matrix assembly performance
3330    the user should preallocate the matrix storage by setting the parameters
3331    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3332    performance can be increased by more than a factor of 50.
3333 
3334    Collective on MPI_Comm
3335 
3336    Input Parameters:
3337 +  A - the matrix
3338 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3339            (same value is used for all local rows)
3340 .  d_nnz - array containing the number of nonzeros in the various rows of the
3341            DIAGONAL portion of the local submatrix (possibly different for each row)
3342            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3343            The size of this array is equal to the number of local rows, i.e 'm'.
3344            You must leave room for the diagonal entry even if it is zero.
3345 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3346            submatrix (same value is used for all local rows).
3347 -  o_nnz - array containing the number of nonzeros in the various rows of the
3348            OFF-DIAGONAL portion of the local submatrix (possibly different for
3349            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3350            structure. The size of this array is equal to the number
3351            of local rows, i.e 'm'.
3352 
3353    If the *_nnz parameter is given then the *_nz parameter is ignored
3354 
3355    The AIJ format (also called the Yale sparse matrix format or
3356    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3357    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3358 
3359    The parallel matrix is partitioned such that the first m0 rows belong to
3360    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3361    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3362 
3363    The DIAGONAL portion of the local submatrix of a processor can be defined
3364    as the submatrix which is obtained by extraction the part corresponding
3365    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3366    first row that belongs to the processor, and r2 is the last row belonging
3367    to the this processor. This is a square mxm matrix. The remaining portion
3368    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3369 
3370    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3371 
3372    You can call MatGetInfo() to get information on how effective the preallocation was;
3373    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3374    You can also run with the option -info and look for messages with the string
3375    malloc in them to see if additional memory allocation was needed.
3376 
3377    Example usage:
3378 
3379    Consider the following 8x8 matrix with 34 non-zero values, that is
3380    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3381    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3382    as follows:
3383 
3384 .vb
3385             1  2  0  |  0  3  0  |  0  4
3386     Proc0   0  5  6  |  7  0  0  |  8  0
3387             9  0 10  | 11  0  0  | 12  0
3388     -------------------------------------
3389            13  0 14  | 15 16 17  |  0  0
3390     Proc1   0 18  0  | 19 20 21  |  0  0
3391             0  0  0  | 22 23  0  | 24  0
3392     -------------------------------------
3393     Proc2  25 26 27  |  0  0 28  | 29  0
3394            30  0  0  | 31 32 33  |  0 34
3395 .ve
3396 
3397    This can be represented as a collection of submatrices as:
3398 
3399 .vb
3400       A B C
3401       D E F
3402       G H I
3403 .ve
3404 
3405    Where the submatrices A,B,C are owned by proc0, D,E,F are
3406    owned by proc1, G,H,I are owned by proc2.
3407 
3408    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3409    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3410    The 'M','N' parameters are 8,8, and have the same values on all procs.
3411 
3412    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3413    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3414    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3415    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3416    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3417    matrix, ans [DF] as another SeqAIJ matrix.
3418 
3419    When d_nz, o_nz parameters are specified, d_nz storage elements are
3420    allocated for every row of the local diagonal submatrix, and o_nz
3421    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3422    One way to choose d_nz and o_nz is to use the max nonzerors per local
3423    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3424    In this case, the values of d_nz,o_nz are:
3425 .vb
3426      proc0 : dnz = 2, o_nz = 2
3427      proc1 : dnz = 3, o_nz = 2
3428      proc2 : dnz = 1, o_nz = 4
3429 .ve
3430    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3431    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3432    for proc3. i.e we are using 12+15+10=37 storage locations to store
3433    34 values.
3434 
3435    When d_nnz, o_nnz parameters are specified, the storage is specified
3436    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3437    In the above case the values for d_nnz,o_nnz are:
3438 .vb
3439      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3440      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3441      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3442 .ve
3443    Here the space allocated is sum of all the above values i.e 34, and
3444    hence pre-allocation is perfect.
3445 
3446    Level: intermediate
3447 
3448 .keywords: matrix, aij, compressed row, sparse, parallel
3449 
3450 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3451           MPIAIJ, MatGetInfo()
3452 @*/
3453 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3454 {
3455   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3456 
3457   PetscFunctionBegin;
3458   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3459   if (f) {
3460     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3461   }
3462   PetscFunctionReturn(0);
3463 }
3464 
3465 #undef __FUNCT__
3466 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3467 /*@
3468      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3469          CSR format the local rows.
3470 
3471    Collective on MPI_Comm
3472 
3473    Input Parameters:
3474 +  comm - MPI communicator
3475 .  m - number of local rows (Cannot be PETSC_DECIDE)
3476 .  n - This value should be the same as the local size used in creating the
3477        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3478        calculated if N is given) For square matrices n is almost always m.
3479 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3480 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3481 .   i - row indices
3482 .   j - column indices
3483 -   a - matrix values
3484 
3485    Output Parameter:
3486 .   mat - the matrix
3487 
3488    Level: intermediate
3489 
3490    Notes:
3491        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3492      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3493      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3494 
3495        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3496 
3497        The format which is used for the sparse matrix input, is equivalent to a
3498     row-major ordering.. i.e for the following matrix, the input data expected is
3499     as shown:
3500 
3501         1 0 0
3502         2 0 3     P0
3503        -------
3504         4 5 6     P1
3505 
3506      Process0 [P0]: rows_owned=[0,1]
3507         i =  {0,1,3}  [size = nrow+1  = 2+1]
3508         j =  {0,0,2}  [size = nz = 6]
3509         v =  {1,2,3}  [size = nz = 6]
3510 
3511      Process1 [P1]: rows_owned=[2]
3512         i =  {0,3}    [size = nrow+1  = 1+1]
3513         j =  {0,1,2}  [size = nz = 6]
3514         v =  {4,5,6}  [size = nz = 6]
3515 
3516 .keywords: matrix, aij, compressed row, sparse, parallel
3517 
3518 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3519           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3520 @*/
3521 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3522 {
3523   PetscErrorCode ierr;
3524 
3525  PetscFunctionBegin;
3526   if (i[0]) {
3527     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3528   }
3529   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3530   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3531   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3532   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3533   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3534   PetscFunctionReturn(0);
3535 }
3536 
3537 #undef __FUNCT__
3538 #define __FUNCT__ "MatCreateMPIAIJ"
3539 /*@C
3540    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3541    (the default parallel PETSc format).  For good matrix assembly performance
3542    the user should preallocate the matrix storage by setting the parameters
3543    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3544    performance can be increased by more than a factor of 50.
3545 
3546    Collective on MPI_Comm
3547 
3548    Input Parameters:
3549 +  comm - MPI communicator
3550 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3551            This value should be the same as the local size used in creating the
3552            y vector for the matrix-vector product y = Ax.
3553 .  n - This value should be the same as the local size used in creating the
3554        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3555        calculated if N is given) For square matrices n is almost always m.
3556 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3557 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3558 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3559            (same value is used for all local rows)
3560 .  d_nnz - array containing the number of nonzeros in the various rows of the
3561            DIAGONAL portion of the local submatrix (possibly different for each row)
3562            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3563            The size of this array is equal to the number of local rows, i.e 'm'.
3564            You must leave room for the diagonal entry even if it is zero.
3565 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3566            submatrix (same value is used for all local rows).
3567 -  o_nnz - array containing the number of nonzeros in the various rows of the
3568            OFF-DIAGONAL portion of the local submatrix (possibly different for
3569            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3570            structure. The size of this array is equal to the number
3571            of local rows, i.e 'm'.
3572 
3573    Output Parameter:
3574 .  A - the matrix
3575 
3576    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3577    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3578    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3579 
3580    Notes:
3581    If the *_nnz parameter is given then the *_nz parameter is ignored
3582 
3583    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3584    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3585    storage requirements for this matrix.
3586 
3587    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3588    processor than it must be used on all processors that share the object for
3589    that argument.
3590 
3591    The user MUST specify either the local or global matrix dimensions
3592    (possibly both).
3593 
3594    The parallel matrix is partitioned across processors such that the
3595    first m0 rows belong to process 0, the next m1 rows belong to
3596    process 1, the next m2 rows belong to process 2 etc.. where
3597    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3598    values corresponding to [m x N] submatrix.
3599 
3600    The columns are logically partitioned with the n0 columns belonging
3601    to 0th partition, the next n1 columns belonging to the next
3602    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3603 
3604    The DIAGONAL portion of the local submatrix on any given processor
3605    is the submatrix corresponding to the rows and columns m,n
3606    corresponding to the given processor. i.e diagonal matrix on
3607    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3608    etc. The remaining portion of the local submatrix [m x (N-n)]
3609    constitute the OFF-DIAGONAL portion. The example below better
3610    illustrates this concept.
3611 
3612    For a square global matrix we define each processor's diagonal portion
3613    to be its local rows and the corresponding columns (a square submatrix);
3614    each processor's off-diagonal portion encompasses the remainder of the
3615    local matrix (a rectangular submatrix).
3616 
3617    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3618 
3619    When calling this routine with a single process communicator, a matrix of
3620    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3621    type of communicator, use the construction mechanism:
3622      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3623 
3624    By default, this format uses inodes (identical nodes) when possible.
3625    We search for consecutive rows with the same nonzero structure, thereby
3626    reusing matrix information to achieve increased efficiency.
3627 
3628    Options Database Keys:
3629 +  -mat_no_inode  - Do not use inodes
3630 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3631 -  -mat_aij_oneindex - Internally use indexing starting at 1
3632         rather than 0.  Note that when calling MatSetValues(),
3633         the user still MUST index entries starting at 0!
3634 
3635 
3636    Example usage:
3637 
3638    Consider the following 8x8 matrix with 34 non-zero values, that is
3639    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3640    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3641    as follows:
3642 
3643 .vb
3644             1  2  0  |  0  3  0  |  0  4
3645     Proc0   0  5  6  |  7  0  0  |  8  0
3646             9  0 10  | 11  0  0  | 12  0
3647     -------------------------------------
3648            13  0 14  | 15 16 17  |  0  0
3649     Proc1   0 18  0  | 19 20 21  |  0  0
3650             0  0  0  | 22 23  0  | 24  0
3651     -------------------------------------
3652     Proc2  25 26 27  |  0  0 28  | 29  0
3653            30  0  0  | 31 32 33  |  0 34
3654 .ve
3655 
3656    This can be represented as a collection of submatrices as:
3657 
3658 .vb
3659       A B C
3660       D E F
3661       G H I
3662 .ve
3663 
3664    Where the submatrices A,B,C are owned by proc0, D,E,F are
3665    owned by proc1, G,H,I are owned by proc2.
3666 
3667    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3668    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3669    The 'M','N' parameters are 8,8, and have the same values on all procs.
3670 
3671    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3672    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3673    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3674    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3675    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3676    matrix, ans [DF] as another SeqAIJ matrix.
3677 
3678    When d_nz, o_nz parameters are specified, d_nz storage elements are
3679    allocated for every row of the local diagonal submatrix, and o_nz
3680    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3681    One way to choose d_nz and o_nz is to use the max nonzerors per local
3682    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3683    In this case, the values of d_nz,o_nz are:
3684 .vb
3685      proc0 : dnz = 2, o_nz = 2
3686      proc1 : dnz = 3, o_nz = 2
3687      proc2 : dnz = 1, o_nz = 4
3688 .ve
3689    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3690    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3691    for proc3. i.e we are using 12+15+10=37 storage locations to store
3692    34 values.
3693 
3694    When d_nnz, o_nnz parameters are specified, the storage is specified
3695    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3696    In the above case the values for d_nnz,o_nnz are:
3697 .vb
3698      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3699      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3700      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3701 .ve
3702    Here the space allocated is sum of all the above values i.e 34, and
3703    hence pre-allocation is perfect.
3704 
3705    Level: intermediate
3706 
3707 .keywords: matrix, aij, compressed row, sparse, parallel
3708 
3709 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3710           MPIAIJ, MatCreateMPIAIJWithArrays()
3711 @*/
3712 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3713 {
3714   PetscErrorCode ierr;
3715   PetscMPIInt    size;
3716 
3717   PetscFunctionBegin;
3718   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3719   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3720   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3721   if (size > 1) {
3722     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3723     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3724   } else {
3725     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3726     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3727   }
3728   PetscFunctionReturn(0);
3729 }
3730 
3731 #undef __FUNCT__
3732 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3733 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3734 {
3735   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3736 
3737   PetscFunctionBegin;
3738   *Ad     = a->A;
3739   *Ao     = a->B;
3740   *colmap = a->garray;
3741   PetscFunctionReturn(0);
3742 }
3743 
3744 #undef __FUNCT__
3745 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3746 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3747 {
3748   PetscErrorCode ierr;
3749   PetscInt       i;
3750   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3751 
3752   PetscFunctionBegin;
3753   if (coloring->ctype == IS_COLORING_GLOBAL) {
3754     ISColoringValue *allcolors,*colors;
3755     ISColoring      ocoloring;
3756 
3757     /* set coloring for diagonal portion */
3758     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3759 
3760     /* set coloring for off-diagonal portion */
3761     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3762     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3763     for (i=0; i<a->B->cmap->n; i++) {
3764       colors[i] = allcolors[a->garray[i]];
3765     }
3766     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3767     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3768     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3769     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3770   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3771     ISColoringValue *colors;
3772     PetscInt        *larray;
3773     ISColoring      ocoloring;
3774 
3775     /* set coloring for diagonal portion */
3776     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3777     for (i=0; i<a->A->cmap->n; i++) {
3778       larray[i] = i + A->cmap->rstart;
3779     }
3780     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3781     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3782     for (i=0; i<a->A->cmap->n; i++) {
3783       colors[i] = coloring->colors[larray[i]];
3784     }
3785     ierr = PetscFree(larray);CHKERRQ(ierr);
3786     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3787     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3788     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3789 
3790     /* set coloring for off-diagonal portion */
3791     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3792     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3793     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3794     for (i=0; i<a->B->cmap->n; i++) {
3795       colors[i] = coloring->colors[larray[i]];
3796     }
3797     ierr = PetscFree(larray);CHKERRQ(ierr);
3798     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3799     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3800     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3801   } else {
3802     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3803   }
3804 
3805   PetscFunctionReturn(0);
3806 }
3807 
3808 #if defined(PETSC_HAVE_ADIC)
3809 #undef __FUNCT__
3810 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3811 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3812 {
3813   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3814   PetscErrorCode ierr;
3815 
3816   PetscFunctionBegin;
3817   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3818   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3819   PetscFunctionReturn(0);
3820 }
3821 #endif
3822 
3823 #undef __FUNCT__
3824 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3825 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3826 {
3827   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3828   PetscErrorCode ierr;
3829 
3830   PetscFunctionBegin;
3831   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3832   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3833   PetscFunctionReturn(0);
3834 }
3835 
3836 #undef __FUNCT__
3837 #define __FUNCT__ "MatMerge"
3838 /*@
3839       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3840                  matrices from each processor
3841 
3842     Collective on MPI_Comm
3843 
3844    Input Parameters:
3845 +    comm - the communicators the parallel matrix will live on
3846 .    inmat - the input sequential matrices
3847 .    n - number of local columns (or PETSC_DECIDE)
3848 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3849 
3850    Output Parameter:
3851 .    outmat - the parallel matrix generated
3852 
3853     Level: advanced
3854 
3855    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3856 
3857 @*/
3858 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3859 {
3860   PetscErrorCode ierr;
3861   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3862   PetscInt       *indx;
3863   PetscScalar    *values;
3864 
3865   PetscFunctionBegin;
3866   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3867   if (scall == MAT_INITIAL_MATRIX){
3868     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3869     if (n == PETSC_DECIDE){
3870       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3871     }
3872     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3873     rstart -= m;
3874 
3875     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3876     for (i=0;i<m;i++) {
3877       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3878       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3879       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3880     }
3881     /* This routine will ONLY return MPIAIJ type matrix */
3882     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3883     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3884     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3885     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3886     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3887 
3888   } else if (scall == MAT_REUSE_MATRIX){
3889     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3890   } else {
3891     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3892   }
3893 
3894   for (i=0;i<m;i++) {
3895     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3896     Ii    = i + rstart;
3897     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3898     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3899   }
3900   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3901   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3902   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3903 
3904   PetscFunctionReturn(0);
3905 }
3906 
3907 #undef __FUNCT__
3908 #define __FUNCT__ "MatFileSplit"
3909 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3910 {
3911   PetscErrorCode    ierr;
3912   PetscMPIInt       rank;
3913   PetscInt          m,N,i,rstart,nnz;
3914   size_t            len;
3915   const PetscInt    *indx;
3916   PetscViewer       out;
3917   char              *name;
3918   Mat               B;
3919   const PetscScalar *values;
3920 
3921   PetscFunctionBegin;
3922   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3923   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3924   /* Should this be the type of the diagonal block of A? */
3925   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3926   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3927   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3928   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3929   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3930   for (i=0;i<m;i++) {
3931     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3932     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3933     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3934   }
3935   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3936   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3937 
3938   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3939   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3940   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3941   sprintf(name,"%s.%d",outfile,rank);
3942   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3943   ierr = PetscFree(name);
3944   ierr = MatView(B,out);CHKERRQ(ierr);
3945   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3946   ierr = MatDestroy(B);CHKERRQ(ierr);
3947   PetscFunctionReturn(0);
3948 }
3949 
3950 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3951 #undef __FUNCT__
3952 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3953 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3954 {
3955   PetscErrorCode       ierr;
3956   Mat_Merge_SeqsToMPI  *merge;
3957   PetscContainer       container;
3958 
3959   PetscFunctionBegin;
3960   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3961   if (container) {
3962     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3963     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3964     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3965     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3966     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3967     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3968     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3969     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3970     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3971     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3972     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3973     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
3974 
3975     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
3976     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3977   }
3978   ierr = PetscFree(merge);CHKERRQ(ierr);
3979 
3980   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3981   PetscFunctionReturn(0);
3982 }
3983 
3984 #include "../src/mat/utils/freespace.h"
3985 #include "petscbt.h"
3986 
3987 #undef __FUNCT__
3988 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3989 /*@C
3990       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3991                  matrices from each processor
3992 
3993     Collective on MPI_Comm
3994 
3995    Input Parameters:
3996 +    comm - the communicators the parallel matrix will live on
3997 .    seqmat - the input sequential matrices
3998 .    m - number of local rows (or PETSC_DECIDE)
3999 .    n - number of local columns (or PETSC_DECIDE)
4000 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4001 
4002    Output Parameter:
4003 .    mpimat - the parallel matrix generated
4004 
4005     Level: advanced
4006 
4007    Notes:
4008      The dimensions of the sequential matrix in each processor MUST be the same.
4009      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4010      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4011 @*/
4012 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4013 {
4014   PetscErrorCode       ierr;
4015   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4016   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4017   PetscMPIInt          size,rank,taga,*len_s;
4018   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4019   PetscInt             proc,m;
4020   PetscInt             **buf_ri,**buf_rj;
4021   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4022   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4023   MPI_Request          *s_waits,*r_waits;
4024   MPI_Status           *status;
4025   MatScalar            *aa=a->a;
4026   MatScalar            **abuf_r,*ba_i;
4027   Mat_Merge_SeqsToMPI  *merge;
4028   PetscContainer       container;
4029 
4030   PetscFunctionBegin;
4031   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4032 
4033   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4034   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4035 
4036   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4037   if (container) {
4038     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4039   }
4040   bi     = merge->bi;
4041   bj     = merge->bj;
4042   buf_ri = merge->buf_ri;
4043   buf_rj = merge->buf_rj;
4044 
4045   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4046   owners = merge->rowmap.range;
4047   len_s  = merge->len_s;
4048 
4049   /* send and recv matrix values */
4050   /*-----------------------------*/
4051   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4052   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4053 
4054   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4055   for (proc=0,k=0; proc<size; proc++){
4056     if (!len_s[proc]) continue;
4057     i = owners[proc];
4058     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4059     k++;
4060   }
4061 
4062   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4063   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4064   ierr = PetscFree(status);CHKERRQ(ierr);
4065 
4066   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4067   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4068 
4069   /* insert mat values of mpimat */
4070   /*----------------------------*/
4071   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4072   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4073   nextrow = buf_ri_k + merge->nrecv;
4074   nextai  = nextrow + merge->nrecv;
4075 
4076   for (k=0; k<merge->nrecv; k++){
4077     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4078     nrows = *(buf_ri_k[k]);
4079     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4080     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4081   }
4082 
4083   /* set values of ba */
4084   m = merge->rowmap.n;
4085   for (i=0; i<m; i++) {
4086     arow = owners[rank] + i;
4087     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4088     bnzi = bi[i+1] - bi[i];
4089     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4090 
4091     /* add local non-zero vals of this proc's seqmat into ba */
4092     anzi = ai[arow+1] - ai[arow];
4093     aj   = a->j + ai[arow];
4094     aa   = a->a + ai[arow];
4095     nextaj = 0;
4096     for (j=0; nextaj<anzi; j++){
4097       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4098         ba_i[j] += aa[nextaj++];
4099       }
4100     }
4101 
4102     /* add received vals into ba */
4103     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4104       /* i-th row */
4105       if (i == *nextrow[k]) {
4106         anzi = *(nextai[k]+1) - *nextai[k];
4107         aj   = buf_rj[k] + *(nextai[k]);
4108         aa   = abuf_r[k] + *(nextai[k]);
4109         nextaj = 0;
4110         for (j=0; nextaj<anzi; j++){
4111           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4112             ba_i[j] += aa[nextaj++];
4113           }
4114         }
4115         nextrow[k]++; nextai[k]++;
4116       }
4117     }
4118     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4119   }
4120   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4121   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4122 
4123   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4124   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4125   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4126   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4127   PetscFunctionReturn(0);
4128 }
4129 
4130 #undef __FUNCT__
4131 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4132 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4133 {
4134   PetscErrorCode       ierr;
4135   Mat                  B_mpi;
4136   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4137   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4138   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4139   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4140   PetscInt             len,proc,*dnz,*onz;
4141   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4142   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4143   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4144   MPI_Status           *status;
4145   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4146   PetscBT              lnkbt;
4147   Mat_Merge_SeqsToMPI  *merge;
4148   PetscContainer       container;
4149 
4150   PetscFunctionBegin;
4151   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4152 
4153   /* make sure it is a PETSc comm */
4154   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4155   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4156   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4157 
4158   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4159   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4160 
4161   /* determine row ownership */
4162   /*---------------------------------------------------------*/
4163   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
4164   merge->rowmap.n = m;
4165   merge->rowmap.N = M;
4166   merge->rowmap.bs = 1;
4167   ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr);
4168   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4169   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4170 
4171   m      = merge->rowmap.n;
4172   M      = merge->rowmap.N;
4173   owners = merge->rowmap.range;
4174 
4175   /* determine the number of messages to send, their lengths */
4176   /*---------------------------------------------------------*/
4177   len_s  = merge->len_s;
4178 
4179   len = 0;  /* length of buf_si[] */
4180   merge->nsend = 0;
4181   for (proc=0; proc<size; proc++){
4182     len_si[proc] = 0;
4183     if (proc == rank){
4184       len_s[proc] = 0;
4185     } else {
4186       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4187       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4188     }
4189     if (len_s[proc]) {
4190       merge->nsend++;
4191       nrows = 0;
4192       for (i=owners[proc]; i<owners[proc+1]; i++){
4193         if (ai[i+1] > ai[i]) nrows++;
4194       }
4195       len_si[proc] = 2*(nrows+1);
4196       len += len_si[proc];
4197     }
4198   }
4199 
4200   /* determine the number and length of messages to receive for ij-structure */
4201   /*-------------------------------------------------------------------------*/
4202   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4203   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4204 
4205   /* post the Irecv of j-structure */
4206   /*-------------------------------*/
4207   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4208   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4209 
4210   /* post the Isend of j-structure */
4211   /*--------------------------------*/
4212   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
4213   sj_waits = si_waits + merge->nsend;
4214 
4215   for (proc=0, k=0; proc<size; proc++){
4216     if (!len_s[proc]) continue;
4217     i = owners[proc];
4218     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4219     k++;
4220   }
4221 
4222   /* receives and sends of j-structure are complete */
4223   /*------------------------------------------------*/
4224   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4225   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4226 
4227   /* send and recv i-structure */
4228   /*---------------------------*/
4229   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4230   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4231 
4232   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4233   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4234   for (proc=0,k=0; proc<size; proc++){
4235     if (!len_s[proc]) continue;
4236     /* form outgoing message for i-structure:
4237          buf_si[0]:                 nrows to be sent
4238                [1:nrows]:           row index (global)
4239                [nrows+1:2*nrows+1]: i-structure index
4240     */
4241     /*-------------------------------------------*/
4242     nrows = len_si[proc]/2 - 1;
4243     buf_si_i    = buf_si + nrows+1;
4244     buf_si[0]   = nrows;
4245     buf_si_i[0] = 0;
4246     nrows = 0;
4247     for (i=owners[proc]; i<owners[proc+1]; i++){
4248       anzi = ai[i+1] - ai[i];
4249       if (anzi) {
4250         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4251         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4252         nrows++;
4253       }
4254     }
4255     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4256     k++;
4257     buf_si += len_si[proc];
4258   }
4259 
4260   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4261   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4262 
4263   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4264   for (i=0; i<merge->nrecv; i++){
4265     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4266   }
4267 
4268   ierr = PetscFree(len_si);CHKERRQ(ierr);
4269   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4270   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4271   ierr = PetscFree(si_waits);CHKERRQ(ierr);
4272   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4273   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4274   ierr = PetscFree(status);CHKERRQ(ierr);
4275 
4276   /* compute a local seq matrix in each processor */
4277   /*----------------------------------------------*/
4278   /* allocate bi array and free space for accumulating nonzero column info */
4279   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4280   bi[0] = 0;
4281 
4282   /* create and initialize a linked list */
4283   nlnk = N+1;
4284   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4285 
4286   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4287   len = 0;
4288   len  = ai[owners[rank+1]] - ai[owners[rank]];
4289   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4290   current_space = free_space;
4291 
4292   /* determine symbolic info for each local row */
4293   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4294   nextrow = buf_ri_k + merge->nrecv;
4295   nextai  = nextrow + merge->nrecv;
4296   for (k=0; k<merge->nrecv; k++){
4297     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4298     nrows = *buf_ri_k[k];
4299     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4300     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4301   }
4302 
4303   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4304   len = 0;
4305   for (i=0;i<m;i++) {
4306     bnzi   = 0;
4307     /* add local non-zero cols of this proc's seqmat into lnk */
4308     arow   = owners[rank] + i;
4309     anzi   = ai[arow+1] - ai[arow];
4310     aj     = a->j + ai[arow];
4311     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4312     bnzi += nlnk;
4313     /* add received col data into lnk */
4314     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4315       if (i == *nextrow[k]) { /* i-th row */
4316         anzi = *(nextai[k]+1) - *nextai[k];
4317         aj   = buf_rj[k] + *nextai[k];
4318         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4319         bnzi += nlnk;
4320         nextrow[k]++; nextai[k]++;
4321       }
4322     }
4323     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4324 
4325     /* if free space is not available, make more free space */
4326     if (current_space->local_remaining<bnzi) {
4327       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4328       nspacedouble++;
4329     }
4330     /* copy data into free space, then initialize lnk */
4331     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4332     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4333 
4334     current_space->array           += bnzi;
4335     current_space->local_used      += bnzi;
4336     current_space->local_remaining -= bnzi;
4337 
4338     bi[i+1] = bi[i] + bnzi;
4339   }
4340 
4341   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4342 
4343   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4344   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4345   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4346 
4347   /* create symbolic parallel matrix B_mpi */
4348   /*---------------------------------------*/
4349   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4350   if (n==PETSC_DECIDE) {
4351     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4352   } else {
4353     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4354   }
4355   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4356   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4357   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4358 
4359   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4360   B_mpi->assembled     = PETSC_FALSE;
4361   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4362   merge->bi            = bi;
4363   merge->bj            = bj;
4364   merge->buf_ri        = buf_ri;
4365   merge->buf_rj        = buf_rj;
4366   merge->coi           = PETSC_NULL;
4367   merge->coj           = PETSC_NULL;
4368   merge->owners_co     = PETSC_NULL;
4369 
4370   /* attach the supporting struct to B_mpi for reuse */
4371   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4372   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4373   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4374   *mpimat = B_mpi;
4375 
4376   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4377   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4378   PetscFunctionReturn(0);
4379 }
4380 
4381 #undef __FUNCT__
4382 #define __FUNCT__ "MatMerge_SeqsToMPI"
4383 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4384 {
4385   PetscErrorCode   ierr;
4386 
4387   PetscFunctionBegin;
4388   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4389   if (scall == MAT_INITIAL_MATRIX){
4390     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4391   }
4392   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4393   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4394   PetscFunctionReturn(0);
4395 }
4396 
4397 #undef __FUNCT__
4398 #define __FUNCT__ "MatGetLocalMat"
4399 /*@
4400      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4401 
4402     Not Collective
4403 
4404    Input Parameters:
4405 +    A - the matrix
4406 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4407 
4408    Output Parameter:
4409 .    A_loc - the local sequential matrix generated
4410 
4411     Level: developer
4412 
4413 @*/
4414 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4415 {
4416   PetscErrorCode  ierr;
4417   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4418   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4419   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4420   MatScalar       *aa=a->a,*ba=b->a,*cam;
4421   PetscScalar     *ca;
4422   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4423   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4424 
4425   PetscFunctionBegin;
4426   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4427   if (scall == MAT_INITIAL_MATRIX){
4428     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4429     ci[0] = 0;
4430     for (i=0; i<am; i++){
4431       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4432     }
4433     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4434     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4435     k = 0;
4436     for (i=0; i<am; i++) {
4437       ncols_o = bi[i+1] - bi[i];
4438       ncols_d = ai[i+1] - ai[i];
4439       /* off-diagonal portion of A */
4440       for (jo=0; jo<ncols_o; jo++) {
4441         col = cmap[*bj];
4442         if (col >= cstart) break;
4443         cj[k]   = col; bj++;
4444         ca[k++] = *ba++;
4445       }
4446       /* diagonal portion of A */
4447       for (j=0; j<ncols_d; j++) {
4448         cj[k]   = cstart + *aj++;
4449         ca[k++] = *aa++;
4450       }
4451       /* off-diagonal portion of A */
4452       for (j=jo; j<ncols_o; j++) {
4453         cj[k]   = cmap[*bj++];
4454         ca[k++] = *ba++;
4455       }
4456     }
4457     /* put together the new matrix */
4458     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4459     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4460     /* Since these are PETSc arrays, change flags to free them as necessary. */
4461     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4462     mat->free_a  = PETSC_TRUE;
4463     mat->free_ij = PETSC_TRUE;
4464     mat->nonew   = 0;
4465   } else if (scall == MAT_REUSE_MATRIX){
4466     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4467     ci = mat->i; cj = mat->j; cam = mat->a;
4468     for (i=0; i<am; i++) {
4469       /* off-diagonal portion of A */
4470       ncols_o = bi[i+1] - bi[i];
4471       for (jo=0; jo<ncols_o; jo++) {
4472         col = cmap[*bj];
4473         if (col >= cstart) break;
4474         *cam++ = *ba++; bj++;
4475       }
4476       /* diagonal portion of A */
4477       ncols_d = ai[i+1] - ai[i];
4478       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4479       /* off-diagonal portion of A */
4480       for (j=jo; j<ncols_o; j++) {
4481         *cam++ = *ba++; bj++;
4482       }
4483     }
4484   } else {
4485     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4486   }
4487 
4488   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4489   PetscFunctionReturn(0);
4490 }
4491 
4492 #undef __FUNCT__
4493 #define __FUNCT__ "MatGetLocalMatCondensed"
4494 /*@C
4495      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4496 
4497     Not Collective
4498 
4499    Input Parameters:
4500 +    A - the matrix
4501 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4502 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4503 
4504    Output Parameter:
4505 .    A_loc - the local sequential matrix generated
4506 
4507     Level: developer
4508 
4509 @*/
4510 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4511 {
4512   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4513   PetscErrorCode    ierr;
4514   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4515   IS                isrowa,iscola;
4516   Mat               *aloc;
4517 
4518   PetscFunctionBegin;
4519   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4520   if (!row){
4521     start = A->rmap->rstart; end = A->rmap->rend;
4522     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4523   } else {
4524     isrowa = *row;
4525   }
4526   if (!col){
4527     start = A->cmap->rstart;
4528     cmap  = a->garray;
4529     nzA   = a->A->cmap->n;
4530     nzB   = a->B->cmap->n;
4531     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4532     ncols = 0;
4533     for (i=0; i<nzB; i++) {
4534       if (cmap[i] < start) idx[ncols++] = cmap[i];
4535       else break;
4536     }
4537     imark = i;
4538     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4539     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4540     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4541     ierr = PetscFree(idx);CHKERRQ(ierr);
4542   } else {
4543     iscola = *col;
4544   }
4545   if (scall != MAT_INITIAL_MATRIX){
4546     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4547     aloc[0] = *A_loc;
4548   }
4549   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4550   *A_loc = aloc[0];
4551   ierr = PetscFree(aloc);CHKERRQ(ierr);
4552   if (!row){
4553     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4554   }
4555   if (!col){
4556     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4557   }
4558   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4559   PetscFunctionReturn(0);
4560 }
4561 
4562 #undef __FUNCT__
4563 #define __FUNCT__ "MatGetBrowsOfAcols"
4564 /*@C
4565     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4566 
4567     Collective on Mat
4568 
4569    Input Parameters:
4570 +    A,B - the matrices in mpiaij format
4571 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4572 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4573 
4574    Output Parameter:
4575 +    rowb, colb - index sets of rows and columns of B to extract
4576 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4577 -    B_seq - the sequential matrix generated
4578 
4579     Level: developer
4580 
4581 @*/
4582 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4583 {
4584   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4585   PetscErrorCode    ierr;
4586   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4587   IS                isrowb,iscolb;
4588   Mat               *bseq;
4589 
4590   PetscFunctionBegin;
4591   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4592     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4593   }
4594   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4595 
4596   if (scall == MAT_INITIAL_MATRIX){
4597     start = A->cmap->rstart;
4598     cmap  = a->garray;
4599     nzA   = a->A->cmap->n;
4600     nzB   = a->B->cmap->n;
4601     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4602     ncols = 0;
4603     for (i=0; i<nzB; i++) {  /* row < local row index */
4604       if (cmap[i] < start) idx[ncols++] = cmap[i];
4605       else break;
4606     }
4607     imark = i;
4608     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4609     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4610     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4611     ierr = PetscFree(idx);CHKERRQ(ierr);
4612     *brstart = imark;
4613     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4614   } else {
4615     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4616     isrowb = *rowb; iscolb = *colb;
4617     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4618     bseq[0] = *B_seq;
4619   }
4620   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4621   *B_seq = bseq[0];
4622   ierr = PetscFree(bseq);CHKERRQ(ierr);
4623   if (!rowb){
4624     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4625   } else {
4626     *rowb = isrowb;
4627   }
4628   if (!colb){
4629     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4630   } else {
4631     *colb = iscolb;
4632   }
4633   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4634   PetscFunctionReturn(0);
4635 }
4636 
4637 #undef __FUNCT__
4638 #define __FUNCT__ "MatGetBrowsOfAoCols"
4639 /*@C
4640     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4641     of the OFF-DIAGONAL portion of local A
4642 
4643     Collective on Mat
4644 
4645    Input Parameters:
4646 +    A,B - the matrices in mpiaij format
4647 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4648 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4649 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4650 
4651    Output Parameter:
4652 +    B_oth - the sequential matrix generated
4653 
4654     Level: developer
4655 
4656 @*/
4657 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth)
4658 {
4659   VecScatter_MPI_General *gen_to,*gen_from;
4660   PetscErrorCode         ierr;
4661   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4662   Mat_SeqAIJ             *b_oth;
4663   VecScatter             ctx=a->Mvctx;
4664   MPI_Comm               comm=((PetscObject)ctx)->comm;
4665   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4666   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4667   PetscScalar            *rvalues,*svalues;
4668   MatScalar              *b_otha,*bufa,*bufA;
4669   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4670   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4671   MPI_Status             *sstatus,rstatus;
4672   PetscMPIInt            jj;
4673   PetscInt               *cols,sbs,rbs;
4674   PetscScalar            *vals;
4675 
4676   PetscFunctionBegin;
4677   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4678     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4679   }
4680   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4681   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4682 
4683   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4684   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4685   rvalues  = gen_from->values; /* holds the length of receiving row */
4686   svalues  = gen_to->values;   /* holds the length of sending row */
4687   nrecvs   = gen_from->n;
4688   nsends   = gen_to->n;
4689 
4690   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4691   srow     = gen_to->indices;   /* local row index to be sent */
4692   sstarts  = gen_to->starts;
4693   sprocs   = gen_to->procs;
4694   sstatus  = gen_to->sstatus;
4695   sbs      = gen_to->bs;
4696   rstarts  = gen_from->starts;
4697   rprocs   = gen_from->procs;
4698   rbs      = gen_from->bs;
4699 
4700   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4701   if (scall == MAT_INITIAL_MATRIX){
4702     /* i-array */
4703     /*---------*/
4704     /*  post receives */
4705     for (i=0; i<nrecvs; i++){
4706       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4707       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4708       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4709     }
4710 
4711     /* pack the outgoing message */
4712     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4713     rstartsj = sstartsj + nsends +1;
4714     sstartsj[0] = 0;  rstartsj[0] = 0;
4715     len = 0; /* total length of j or a array to be sent */
4716     k = 0;
4717     for (i=0; i<nsends; i++){
4718       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4719       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4720       for (j=0; j<nrows; j++) {
4721         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4722         for (l=0; l<sbs; l++){
4723           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4724           rowlen[j*sbs+l] = ncols;
4725           len += ncols;
4726           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4727         }
4728         k++;
4729       }
4730       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4731       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4732     }
4733     /* recvs and sends of i-array are completed */
4734     i = nrecvs;
4735     while (i--) {
4736       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4737     }
4738     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4739 
4740     /* allocate buffers for sending j and a arrays */
4741     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4742     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4743 
4744     /* create i-array of B_oth */
4745     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4746     b_othi[0] = 0;
4747     len = 0; /* total length of j or a array to be received */
4748     k = 0;
4749     for (i=0; i<nrecvs; i++){
4750       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4751       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4752       for (j=0; j<nrows; j++) {
4753         b_othi[k+1] = b_othi[k] + rowlen[j];
4754         len += rowlen[j]; k++;
4755       }
4756       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4757     }
4758 
4759     /* allocate space for j and a arrrays of B_oth */
4760     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4761     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4762 
4763     /* j-array */
4764     /*---------*/
4765     /*  post receives of j-array */
4766     for (i=0; i<nrecvs; i++){
4767       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4768       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4769     }
4770 
4771     /* pack the outgoing message j-array */
4772     k = 0;
4773     for (i=0; i<nsends; i++){
4774       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4775       bufJ = bufj+sstartsj[i];
4776       for (j=0; j<nrows; j++) {
4777         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4778         for (ll=0; ll<sbs; ll++){
4779           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4780           for (l=0; l<ncols; l++){
4781             *bufJ++ = cols[l];
4782           }
4783           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4784         }
4785       }
4786       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4787     }
4788 
4789     /* recvs and sends of j-array are completed */
4790     i = nrecvs;
4791     while (i--) {
4792       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4793     }
4794     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4795   } else if (scall == MAT_REUSE_MATRIX){
4796     sstartsj = *startsj;
4797     rstartsj = sstartsj + nsends +1;
4798     bufa     = *bufa_ptr;
4799     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4800     b_otha   = b_oth->a;
4801   } else {
4802     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4803   }
4804 
4805   /* a-array */
4806   /*---------*/
4807   /*  post receives of a-array */
4808   for (i=0; i<nrecvs; i++){
4809     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4810     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4811   }
4812 
4813   /* pack the outgoing message a-array */
4814   k = 0;
4815   for (i=0; i<nsends; i++){
4816     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4817     bufA = bufa+sstartsj[i];
4818     for (j=0; j<nrows; j++) {
4819       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4820       for (ll=0; ll<sbs; ll++){
4821         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4822         for (l=0; l<ncols; l++){
4823           *bufA++ = vals[l];
4824         }
4825         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4826       }
4827     }
4828     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4829   }
4830   /* recvs and sends of a-array are completed */
4831   i = nrecvs;
4832   while (i--) {
4833     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4834   }
4835   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4836   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4837 
4838   if (scall == MAT_INITIAL_MATRIX){
4839     /* put together the new matrix */
4840     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4841 
4842     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4843     /* Since these are PETSc arrays, change flags to free them as necessary. */
4844     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4845     b_oth->free_a  = PETSC_TRUE;
4846     b_oth->free_ij = PETSC_TRUE;
4847     b_oth->nonew   = 0;
4848 
4849     ierr = PetscFree(bufj);CHKERRQ(ierr);
4850     if (!startsj || !bufa_ptr){
4851       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4852       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4853     } else {
4854       *startsj  = sstartsj;
4855       *bufa_ptr = bufa;
4856     }
4857   }
4858   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4859   PetscFunctionReturn(0);
4860 }
4861 
4862 #undef __FUNCT__
4863 #define __FUNCT__ "MatGetCommunicationStructs"
4864 /*@C
4865   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4866 
4867   Not Collective
4868 
4869   Input Parameters:
4870 . A - The matrix in mpiaij format
4871 
4872   Output Parameter:
4873 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4874 . colmap - A map from global column index to local index into lvec
4875 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4876 
4877   Level: developer
4878 
4879 @*/
4880 #if defined (PETSC_USE_CTABLE)
4881 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4882 #else
4883 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4884 #endif
4885 {
4886   Mat_MPIAIJ *a;
4887 
4888   PetscFunctionBegin;
4889   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4890   PetscValidPointer(lvec, 2)
4891   PetscValidPointer(colmap, 3)
4892   PetscValidPointer(multScatter, 4)
4893   a = (Mat_MPIAIJ *) A->data;
4894   if (lvec) *lvec = a->lvec;
4895   if (colmap) *colmap = a->colmap;
4896   if (multScatter) *multScatter = a->Mvctx;
4897   PetscFunctionReturn(0);
4898 }
4899 
4900 EXTERN_C_BEGIN
4901 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*);
4902 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*);
4903 EXTERN_C_END
4904 
4905 #include "../src/mat/impls/dense/mpi/mpidense.h"
4906 
4907 #undef __FUNCT__
4908 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4909 /*
4910     Computes (B'*A')' since computing B*A directly is untenable
4911 
4912                n                       p                          p
4913         (              )       (              )         (                  )
4914       m (      A       )  *  n (       B      )   =   m (         C        )
4915         (              )       (              )         (                  )
4916 
4917 */
4918 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4919 {
4920   PetscErrorCode     ierr;
4921   Mat                At,Bt,Ct;
4922 
4923   PetscFunctionBegin;
4924   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4925   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4926   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4927   ierr = MatDestroy(At);CHKERRQ(ierr);
4928   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4929   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4930   ierr = MatDestroy(Ct);CHKERRQ(ierr);
4931   PetscFunctionReturn(0);
4932 }
4933 
4934 #undef __FUNCT__
4935 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4936 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4937 {
4938   PetscErrorCode ierr;
4939   PetscInt       m=A->rmap->n,n=B->cmap->n;
4940   Mat            Cmat;
4941 
4942   PetscFunctionBegin;
4943   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4944   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
4945   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4946   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4947   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
4948   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4949   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4950   *C   = Cmat;
4951   PetscFunctionReturn(0);
4952 }
4953 
4954 /* ----------------------------------------------------------------*/
4955 #undef __FUNCT__
4956 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4957 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4958 {
4959   PetscErrorCode ierr;
4960 
4961   PetscFunctionBegin;
4962   if (scall == MAT_INITIAL_MATRIX){
4963     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4964   }
4965   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4966   PetscFunctionReturn(0);
4967 }
4968 
4969 EXTERN_C_BEGIN
4970 #if defined(PETSC_HAVE_MUMPS)
4971 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*);
4972 #endif
4973 #if defined(PETSC_HAVE_PASTIX)
4974 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
4975 #endif
4976 #if defined(PETSC_HAVE_SUPERLU_DIST)
4977 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
4978 #endif
4979 #if defined(PETSC_HAVE_SPOOLES)
4980 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
4981 #endif
4982 EXTERN_C_END
4983 
4984 /*MC
4985    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4986 
4987    Options Database Keys:
4988 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4989 
4990   Level: beginner
4991 
4992 .seealso: MatCreateMPIAIJ()
4993 M*/
4994 
4995 EXTERN_C_BEGIN
4996 #undef __FUNCT__
4997 #define __FUNCT__ "MatCreate_MPIAIJ"
4998 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
4999 {
5000   Mat_MPIAIJ     *b;
5001   PetscErrorCode ierr;
5002   PetscMPIInt    size;
5003 
5004   PetscFunctionBegin;
5005   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5006 
5007   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5008   B->data         = (void*)b;
5009   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5010   B->rmap->bs      = 1;
5011   B->assembled    = PETSC_FALSE;
5012   B->mapping      = 0;
5013 
5014   B->insertmode      = NOT_SET_VALUES;
5015   b->size            = size;
5016   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5017 
5018   /* build cache for off array entries formed */
5019   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5020   b->donotstash  = PETSC_FALSE;
5021   b->colmap      = 0;
5022   b->garray      = 0;
5023   b->roworiented = PETSC_TRUE;
5024 
5025   /* stuff used for matrix vector multiply */
5026   b->lvec      = PETSC_NULL;
5027   b->Mvctx     = PETSC_NULL;
5028 
5029   /* stuff for MatGetRow() */
5030   b->rowindices   = 0;
5031   b->rowvalues    = 0;
5032   b->getrowactive = PETSC_FALSE;
5033 
5034 #if defined(PETSC_HAVE_SPOOLES)
5035   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_spooles_C",
5036                                      "MatGetFactor_mpiaij_spooles",
5037                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5038 #endif
5039 #if defined(PETSC_HAVE_MUMPS)
5040   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_mumps_C",
5041                                      "MatGetFactor_mpiaij_mumps",
5042                                      MatGetFactor_mpiaij_mumps);CHKERRQ(ierr);
5043 #endif
5044 #if defined(PETSC_HAVE_PASTIX)
5045   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_pastix_C",
5046 					   "MatGetFactor_mpiaij_pastix",
5047 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5048 #endif
5049 #if defined(PETSC_HAVE_SUPERLU_DIST)
5050   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_superlu_dist_C",
5051                                      "MatGetFactor_mpiaij_superlu_dist",
5052                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5053 #endif
5054   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5055                                      "MatStoreValues_MPIAIJ",
5056                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5057   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5058                                      "MatRetrieveValues_MPIAIJ",
5059                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5060   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5061 				     "MatGetDiagonalBlock_MPIAIJ",
5062                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5063   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5064 				     "MatIsTranspose_MPIAIJ",
5065 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5066   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5067 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5068 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5069   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5070 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5071 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5072   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5073 				     "MatDiagonalScaleLocal_MPIAIJ",
5074 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5075   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
5076                                      "MatConvert_MPIAIJ_MPICSRPERM",
5077                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
5078   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
5079                                      "MatConvert_MPIAIJ_MPICRL",
5080                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
5081   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5082                                      "MatMatMult_MPIDense_MPIAIJ",
5083                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5084   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5085                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5086                                       MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5087   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5088                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5089                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5090   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5091   PetscFunctionReturn(0);
5092 }
5093 EXTERN_C_END
5094 
5095 #undef __FUNCT__
5096 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5097 /*@
5098      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5099          and "off-diagonal" part of the matrix in CSR format.
5100 
5101    Collective on MPI_Comm
5102 
5103    Input Parameters:
5104 +  comm - MPI communicator
5105 .  m - number of local rows (Cannot be PETSC_DECIDE)
5106 .  n - This value should be the same as the local size used in creating the
5107        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5108        calculated if N is given) For square matrices n is almost always m.
5109 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5110 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5111 .   i - row indices for "diagonal" portion of matrix
5112 .   j - column indices
5113 .   a - matrix values
5114 .   oi - row indices for "off-diagonal" portion of matrix
5115 .   oj - column indices
5116 -   oa - matrix values
5117 
5118    Output Parameter:
5119 .   mat - the matrix
5120 
5121    Level: advanced
5122 
5123    Notes:
5124        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5125 
5126        The i and j indices are 0 based
5127 
5128        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5129 
5130 
5131 .keywords: matrix, aij, compressed row, sparse, parallel
5132 
5133 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5134           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5135 @*/
5136 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5137 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5138 {
5139   PetscErrorCode ierr;
5140   Mat_MPIAIJ     *maij;
5141 
5142  PetscFunctionBegin;
5143   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5144   if (i[0]) {
5145     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5146   }
5147   if (oi[0]) {
5148     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5149   }
5150   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5151   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5152   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5153   maij = (Mat_MPIAIJ*) (*mat)->data;
5154   maij->donotstash     = PETSC_TRUE;
5155   (*mat)->preallocated = PETSC_TRUE;
5156 
5157   ierr = PetscMapSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5158   ierr = PetscMapSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5159   ierr = PetscMapSetUp((*mat)->rmap);CHKERRQ(ierr);
5160   ierr = PetscMapSetUp((*mat)->cmap);CHKERRQ(ierr);
5161 
5162   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5163   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5164 
5165   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5166   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5167   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5168   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5169 
5170   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5171   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5172   PetscFunctionReturn(0);
5173 }
5174 
5175 /*
5176     Special version for direct calls from Fortran
5177 */
5178 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5179 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5180 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5181 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5182 #endif
5183 
5184 /* Change these macros so can be used in void function */
5185 #undef CHKERRQ
5186 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5187 #undef SETERRQ2
5188 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5189 #undef SETERRQ
5190 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5191 
5192 EXTERN_C_BEGIN
5193 #undef __FUNCT__
5194 #define __FUNCT__ "matsetvaluesmpiaij_"
5195 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5196 {
5197   Mat             mat = *mmat;
5198   PetscInt        m = *mm, n = *mn;
5199   InsertMode      addv = *maddv;
5200   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5201   PetscScalar     value;
5202   PetscErrorCode  ierr;
5203 
5204   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5205   if (mat->insertmode == NOT_SET_VALUES) {
5206     mat->insertmode = addv;
5207   }
5208 #if defined(PETSC_USE_DEBUG)
5209   else if (mat->insertmode != addv) {
5210     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5211   }
5212 #endif
5213   {
5214   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5215   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5216   PetscTruth      roworiented = aij->roworiented;
5217 
5218   /* Some Variables required in the macro */
5219   Mat             A = aij->A;
5220   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5221   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5222   MatScalar       *aa = a->a;
5223   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5224   Mat             B = aij->B;
5225   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5226   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5227   MatScalar       *ba = b->a;
5228 
5229   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5230   PetscInt        nonew = a->nonew;
5231   MatScalar       *ap1,*ap2;
5232 
5233   PetscFunctionBegin;
5234   for (i=0; i<m; i++) {
5235     if (im[i] < 0) continue;
5236 #if defined(PETSC_USE_DEBUG)
5237     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5238 #endif
5239     if (im[i] >= rstart && im[i] < rend) {
5240       row      = im[i] - rstart;
5241       lastcol1 = -1;
5242       rp1      = aj + ai[row];
5243       ap1      = aa + ai[row];
5244       rmax1    = aimax[row];
5245       nrow1    = ailen[row];
5246       low1     = 0;
5247       high1    = nrow1;
5248       lastcol2 = -1;
5249       rp2      = bj + bi[row];
5250       ap2      = ba + bi[row];
5251       rmax2    = bimax[row];
5252       nrow2    = bilen[row];
5253       low2     = 0;
5254       high2    = nrow2;
5255 
5256       for (j=0; j<n; j++) {
5257         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5258         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5259         if (in[j] >= cstart && in[j] < cend){
5260           col = in[j] - cstart;
5261           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5262         } else if (in[j] < 0) continue;
5263 #if defined(PETSC_USE_DEBUG)
5264         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5265 #endif
5266         else {
5267           if (mat->was_assembled) {
5268             if (!aij->colmap) {
5269               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5270             }
5271 #if defined (PETSC_USE_CTABLE)
5272             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5273 	    col--;
5274 #else
5275             col = aij->colmap[in[j]] - 1;
5276 #endif
5277             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5278               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5279               col =  in[j];
5280               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5281               B = aij->B;
5282               b = (Mat_SeqAIJ*)B->data;
5283               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5284               rp2      = bj + bi[row];
5285               ap2      = ba + bi[row];
5286               rmax2    = bimax[row];
5287               nrow2    = bilen[row];
5288               low2     = 0;
5289               high2    = nrow2;
5290               bm       = aij->B->rmap->n;
5291               ba = b->a;
5292             }
5293           } else col = in[j];
5294           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5295         }
5296       }
5297     } else {
5298       if (!aij->donotstash) {
5299         if (roworiented) {
5300           if (ignorezeroentries && v[i*n] == 0.0) continue;
5301           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
5302         } else {
5303           if (ignorezeroentries && v[i] == 0.0) continue;
5304           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
5305         }
5306       }
5307     }
5308   }}
5309   PetscFunctionReturnVoid();
5310 }
5311 EXTERN_C_END
5312 
5313