xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 6bf464f92cc51e6fd6163850774a6badb2f63b6b)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
7 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
8 {
9   PetscErrorCode  ierr;
10   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
11   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
12   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
13   const PetscInt  *ia,*ib;
14   const MatScalar *aa,*bb;
15   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
16   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
17 
18   PetscFunctionBegin;
19   *keptrows = 0;
20   ia = a->i;
21   ib = b->i;
22   for (i=0; i<m; i++) {
23     na = ia[i+1] - ia[i];
24     nb = ib[i+1] - ib[i];
25     if (!na && !nb) {
26       cnt++;
27       goto ok1;
28     }
29     aa = a->a + ia[i];
30     for (j=0; j<na; j++) {
31       if (aa[j] != 0.0) goto ok1;
32     }
33     bb = b->a + ib[i];
34     for (j=0; j <nb; j++) {
35       if (bb[j] != 0.0) goto ok1;
36     }
37     cnt++;
38     ok1:;
39   }
40   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
41   if (!n0rows) PetscFunctionReturn(0);
42   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
43   cnt = 0;
44   for (i=0; i<m; i++) {
45     na = ia[i+1] - ia[i];
46     nb = ib[i+1] - ib[i];
47     if (!na && !nb) continue;
48     aa = a->a + ia[i];
49     for(j=0; j<na;j++) {
50       if (aa[j] != 0.0) {
51         rows[cnt++] = rstart + i;
52         goto ok2;
53       }
54     }
55     bb = b->a + ib[i];
56     for (j=0; j<nb; j++) {
57       if (bb[j] != 0.0) {
58         rows[cnt++] = rstart + i;
59         goto ok2;
60       }
61     }
62     ok2:;
63   }
64   ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
65   PetscFunctionReturn(0);
66 }
67 
68 #undef __FUNCT__
69 #define __FUNCT__ "MatDistribute_MPIAIJ"
70 /*
71     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
72     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
73 
74     Only for square matrices
75 */
76 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
77 {
78   PetscMPIInt    rank,size;
79   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
80   PetscErrorCode ierr;
81   Mat            mat;
82   Mat_SeqAIJ     *gmata;
83   PetscMPIInt    tag;
84   MPI_Status     status;
85   PetscBool      aij;
86   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
87 
88   PetscFunctionBegin;
89   CHKMEMQ;
90   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
91   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
92   if (!rank) {
93     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
94     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
95   }
96   if (reuse == MAT_INITIAL_MATRIX) {
97     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
98     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
99     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
100     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
101     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
102     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
103     rowners[0] = 0;
104     for (i=2; i<=size; i++) {
105       rowners[i] += rowners[i-1];
106     }
107     rstart = rowners[rank];
108     rend   = rowners[rank+1];
109     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
110     if (!rank) {
111       gmata = (Mat_SeqAIJ*) gmat->data;
112       /* send row lengths to all processors */
113       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
114       for (i=1; i<size; i++) {
115 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
116       }
117       /* determine number diagonal and off-diagonal counts */
118       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
119       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
120       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
121       jj = 0;
122       for (i=0; i<m; i++) {
123 	for (j=0; j<dlens[i]; j++) {
124           if (gmata->j[jj] < rstart) ld[i]++;
125 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
126 	  jj++;
127 	}
128       }
129       /* send column indices to other processes */
130       for (i=1; i<size; i++) {
131 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
132 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
133 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
134       }
135 
136       /* send numerical values to other processes */
137       for (i=1; i<size; i++) {
138         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
139         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
140       }
141       gmataa = gmata->a;
142       gmataj = gmata->j;
143 
144     } else {
145       /* receive row lengths */
146       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
147       /* receive column indices */
148       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
149       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
150       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
151       /* determine number diagonal and off-diagonal counts */
152       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
153       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
154       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
155       jj = 0;
156       for (i=0; i<m; i++) {
157 	for (j=0; j<dlens[i]; j++) {
158           if (gmataj[jj] < rstart) ld[i]++;
159 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
160 	  jj++;
161 	}
162       }
163       /* receive numerical values */
164       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
165       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
166     }
167     /* set preallocation */
168     for (i=0; i<m; i++) {
169       dlens[i] -= olens[i];
170     }
171     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
172     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
173 
174     for (i=0; i<m; i++) {
175       dlens[i] += olens[i];
176     }
177     cnt  = 0;
178     for (i=0; i<m; i++) {
179       row  = rstart + i;
180       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
181       cnt += dlens[i];
182     }
183     if (rank) {
184       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
185     }
186     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
187     ierr = PetscFree(rowners);CHKERRQ(ierr);
188     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
189     *inmat = mat;
190   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
191     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
192     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
193     mat   = *inmat;
194     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
195     if (!rank) {
196       /* send numerical values to other processes */
197       gmata = (Mat_SeqAIJ*) gmat->data;
198       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
199       gmataa = gmata->a;
200       for (i=1; i<size; i++) {
201         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
202         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
203       }
204       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
205     } else {
206       /* receive numerical values from process 0*/
207       nz   = Ad->nz + Ao->nz;
208       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
209       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
210     }
211     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
212     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
213     ad = Ad->a;
214     ao = Ao->a;
215     if (mat->rmap->n) {
216       i  = 0;
217       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
218       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
219     }
220     for (i=1; i<mat->rmap->n; i++) {
221       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
222       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
223     }
224     i--;
225     if (mat->rmap->n) {
226       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
227     }
228     if (rank) {
229       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
230     }
231   }
232   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
233   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
234   CHKMEMQ;
235   PetscFunctionReturn(0);
236 }
237 
238 /*
239   Local utility routine that creates a mapping from the global column
240 number to the local number in the off-diagonal part of the local
241 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
242 a slightly higher hash table cost; without it it is not scalable (each processor
243 has an order N integer array but is fast to acess.
244 */
245 #undef __FUNCT__
246 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
247 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
248 {
249   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
250   PetscErrorCode ierr;
251   PetscInt       n = aij->B->cmap->n,i;
252 
253   PetscFunctionBegin;
254 #if defined (PETSC_USE_CTABLE)
255   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
256   for (i=0; i<n; i++){
257     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
258   }
259 #else
260   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
261   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
262   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
263   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
264 #endif
265   PetscFunctionReturn(0);
266 }
267 
268 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
269 { \
270     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
271     lastcol1 = col;\
272     while (high1-low1 > 5) { \
273       t = (low1+high1)/2; \
274       if (rp1[t] > col) high1 = t; \
275       else             low1  = t; \
276     } \
277       for (_i=low1; _i<high1; _i++) { \
278         if (rp1[_i] > col) break; \
279         if (rp1[_i] == col) { \
280           if (addv == ADD_VALUES) ap1[_i] += value;   \
281           else                    ap1[_i] = value; \
282           goto a_noinsert; \
283         } \
284       }  \
285       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
286       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
287       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
288       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
289       N = nrow1++ - 1; a->nz++; high1++; \
290       /* shift up all the later entries in this row */ \
291       for (ii=N; ii>=_i; ii--) { \
292         rp1[ii+1] = rp1[ii]; \
293         ap1[ii+1] = ap1[ii]; \
294       } \
295       rp1[_i] = col;  \
296       ap1[_i] = value;  \
297       a_noinsert: ; \
298       ailen[row] = nrow1; \
299 }
300 
301 
302 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
303 { \
304     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
305     lastcol2 = col;\
306     while (high2-low2 > 5) { \
307       t = (low2+high2)/2; \
308       if (rp2[t] > col) high2 = t; \
309       else             low2  = t; \
310     } \
311     for (_i=low2; _i<high2; _i++) {		\
312       if (rp2[_i] > col) break;			\
313       if (rp2[_i] == col) {			      \
314 	if (addv == ADD_VALUES) ap2[_i] += value;     \
315 	else                    ap2[_i] = value;      \
316 	goto b_noinsert;			      \
317       }						      \
318     }							      \
319     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
320     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
321     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
322     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
323     N = nrow2++ - 1; b->nz++; high2++;					\
324     /* shift up all the later entries in this row */			\
325     for (ii=N; ii>=_i; ii--) {						\
326       rp2[ii+1] = rp2[ii];						\
327       ap2[ii+1] = ap2[ii];						\
328     }									\
329     rp2[_i] = col;							\
330     ap2[_i] = value;							\
331     b_noinsert: ;								\
332     bilen[row] = nrow2;							\
333 }
334 
335 #undef __FUNCT__
336 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
337 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
338 {
339   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
340   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
341   PetscErrorCode ierr;
342   PetscInt       l,*garray = mat->garray,diag;
343 
344   PetscFunctionBegin;
345   /* code only works for square matrices A */
346 
347   /* find size of row to the left of the diagonal part */
348   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
349   row  = row - diag;
350   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
351     if (garray[b->j[b->i[row]+l]] > diag) break;
352   }
353   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
354 
355   /* diagonal part */
356   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
357 
358   /* right of diagonal part */
359   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
360   PetscFunctionReturn(0);
361 }
362 
363 #undef __FUNCT__
364 #define __FUNCT__ "MatSetValues_MPIAIJ"
365 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
366 {
367   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
368   PetscScalar    value;
369   PetscErrorCode ierr;
370   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
371   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
372   PetscBool      roworiented = aij->roworiented;
373 
374   /* Some Variables required in the macro */
375   Mat            A = aij->A;
376   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
377   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
378   MatScalar      *aa = a->a;
379   PetscBool      ignorezeroentries = a->ignorezeroentries;
380   Mat            B = aij->B;
381   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
382   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
383   MatScalar      *ba = b->a;
384 
385   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
386   PetscInt       nonew = a->nonew;
387   MatScalar      *ap1,*ap2;
388 
389   PetscFunctionBegin;
390   if (v) PetscValidScalarPointer(v,6);
391   for (i=0; i<m; i++) {
392     if (im[i] < 0) continue;
393 #if defined(PETSC_USE_DEBUG)
394     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
395 #endif
396     if (im[i] >= rstart && im[i] < rend) {
397       row      = im[i] - rstart;
398       lastcol1 = -1;
399       rp1      = aj + ai[row];
400       ap1      = aa + ai[row];
401       rmax1    = aimax[row];
402       nrow1    = ailen[row];
403       low1     = 0;
404       high1    = nrow1;
405       lastcol2 = -1;
406       rp2      = bj + bi[row];
407       ap2      = ba + bi[row];
408       rmax2    = bimax[row];
409       nrow2    = bilen[row];
410       low2     = 0;
411       high2    = nrow2;
412 
413       for (j=0; j<n; j++) {
414         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
415         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
416         if (in[j] >= cstart && in[j] < cend){
417           col = in[j] - cstart;
418           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
419         } else if (in[j] < 0) continue;
420 #if defined(PETSC_USE_DEBUG)
421         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
422 #endif
423         else {
424           if (mat->was_assembled) {
425             if (!aij->colmap) {
426               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
427             }
428 #if defined (PETSC_USE_CTABLE)
429             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
430 	    col--;
431 #else
432             col = aij->colmap[in[j]] - 1;
433 #endif
434             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
435               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
436               col =  in[j];
437               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
438               B = aij->B;
439               b = (Mat_SeqAIJ*)B->data;
440               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
441               rp2      = bj + bi[row];
442               ap2      = ba + bi[row];
443               rmax2    = bimax[row];
444               nrow2    = bilen[row];
445               low2     = 0;
446               high2    = nrow2;
447               bm       = aij->B->rmap->n;
448               ba = b->a;
449             }
450           } else col = in[j];
451           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
452         }
453       }
454     } else {
455       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
456       if (!aij->donotstash) {
457         if (roworiented) {
458           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
459         } else {
460           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
461         }
462       }
463     }
464   }
465   PetscFunctionReturn(0);
466 }
467 
468 #undef __FUNCT__
469 #define __FUNCT__ "MatGetValues_MPIAIJ"
470 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
471 {
472   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
473   PetscErrorCode ierr;
474   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
475   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
476 
477   PetscFunctionBegin;
478   for (i=0; i<m; i++) {
479     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
480     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
481     if (idxm[i] >= rstart && idxm[i] < rend) {
482       row = idxm[i] - rstart;
483       for (j=0; j<n; j++) {
484         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
485         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
486         if (idxn[j] >= cstart && idxn[j] < cend){
487           col = idxn[j] - cstart;
488           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
489         } else {
490           if (!aij->colmap) {
491             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
492           }
493 #if defined (PETSC_USE_CTABLE)
494           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
495           col --;
496 #else
497           col = aij->colmap[idxn[j]] - 1;
498 #endif
499           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
500           else {
501             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
502           }
503         }
504       }
505     } else {
506       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
507     }
508   }
509   PetscFunctionReturn(0);
510 }
511 
512 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
513 
514 #undef __FUNCT__
515 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
516 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
517 {
518   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
519   PetscErrorCode ierr;
520   PetscInt       nstash,reallocs;
521   InsertMode     addv;
522 
523   PetscFunctionBegin;
524   if (aij->donotstash || mat->nooffprocentries) {
525     PetscFunctionReturn(0);
526   }
527 
528   /* make sure all processors are either in INSERTMODE or ADDMODE */
529   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
530   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
531   mat->insertmode = addv; /* in case this processor had no cache */
532 
533   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
534   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
535   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
536   PetscFunctionReturn(0);
537 }
538 
539 #undef __FUNCT__
540 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
541 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
542 {
543   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
544   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
545   PetscErrorCode ierr;
546   PetscMPIInt    n;
547   PetscInt       i,j,rstart,ncols,flg;
548   PetscInt       *row,*col;
549   PetscBool      other_disassembled;
550   PetscScalar    *val;
551   InsertMode     addv = mat->insertmode;
552 
553   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
554   PetscFunctionBegin;
555   if (!aij->donotstash && !mat->nooffprocentries) {
556     while (1) {
557       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
558       if (!flg) break;
559 
560       for (i=0; i<n;) {
561         /* Now identify the consecutive vals belonging to the same row */
562         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
563         if (j < n) ncols = j-i;
564         else       ncols = n-i;
565         /* Now assemble all these values with a single function call */
566         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
567         i = j;
568       }
569     }
570     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
571   }
572   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
573   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
574 
575   /* determine if any processor has disassembled, if so we must
576      also disassemble ourselfs, in order that we may reassemble. */
577   /*
578      if nonzero structure of submatrix B cannot change then we know that
579      no processor disassembled thus we can skip this stuff
580   */
581   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
582     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
583     if (mat->was_assembled && !other_disassembled) {
584       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
585     }
586   }
587   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
588     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
589   }
590   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
591   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
592   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
593   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
594 
595   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
596   aij->rowvalues = 0;
597 
598   /* used by MatAXPY() */
599   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
600   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
601 
602   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
603   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
604   PetscFunctionReturn(0);
605 }
606 
607 #undef __FUNCT__
608 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
609 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
610 {
611   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
612   PetscErrorCode ierr;
613 
614   PetscFunctionBegin;
615   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
616   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
617   PetscFunctionReturn(0);
618 }
619 
620 #undef __FUNCT__
621 #define __FUNCT__ "MatZeroRows_MPIAIJ"
622 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
623 {
624   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
625   PetscErrorCode    ierr;
626   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
627   PetscInt          i,*owners = A->rmap->range;
628   PetscInt          *nprocs,j,idx,nsends,row;
629   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
630   PetscInt          *rvalues,count,base,slen,*source;
631   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
632   MPI_Comm          comm = ((PetscObject)A)->comm;
633   MPI_Request       *send_waits,*recv_waits;
634   MPI_Status        recv_status,*send_status;
635   const PetscScalar *xx;
636   PetscScalar       *bb;
637 #if defined(PETSC_DEBUG)
638   PetscBool      found = PETSC_FALSE;
639 #endif
640 
641   PetscFunctionBegin;
642   /*  first count number of contributors to each processor */
643   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
644   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
645   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
646   j = 0;
647   for (i=0; i<N; i++) {
648     if (lastidx > (idx = rows[i])) j = 0;
649     lastidx = idx;
650     for (; j<size; j++) {
651       if (idx >= owners[j] && idx < owners[j+1]) {
652         nprocs[2*j]++;
653         nprocs[2*j+1] = 1;
654         owner[i] = j;
655 #if defined(PETSC_DEBUG)
656         found = PETSC_TRUE;
657 #endif
658         break;
659       }
660     }
661 #if defined(PETSC_DEBUG)
662     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
663     found = PETSC_FALSE;
664 #endif
665   }
666   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
667 
668   if (A->nooffproczerorows) {
669     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
670     nrecvs = nsends;
671     nmax   = N;
672   } else {
673     /* inform other processors of number of messages and max length*/
674     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
675   }
676 
677   /* post receives:   */
678   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
679   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
680   for (i=0; i<nrecvs; i++) {
681     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
682   }
683 
684   /* do sends:
685       1) starts[i] gives the starting index in svalues for stuff going to
686          the ith processor
687   */
688   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
689   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
690   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
691   starts[0] = 0;
692   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
693   for (i=0; i<N; i++) {
694     svalues[starts[owner[i]]++] = rows[i];
695   }
696 
697   starts[0] = 0;
698   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
699   count = 0;
700   for (i=0; i<size; i++) {
701     if (nprocs[2*i+1]) {
702       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
703     }
704   }
705   ierr = PetscFree(starts);CHKERRQ(ierr);
706 
707   base = owners[rank];
708 
709   /*  wait on receives */
710   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
711   count  = nrecvs; slen = 0;
712   while (count) {
713     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
714     /* unpack receives into our local space */
715     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
716     source[imdex]  = recv_status.MPI_SOURCE;
717     lens[imdex]    = n;
718     slen          += n;
719     count--;
720   }
721   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
722 
723   /* move the data into the send scatter */
724   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
725   count = 0;
726   for (i=0; i<nrecvs; i++) {
727     values = rvalues + i*nmax;
728     for (j=0; j<lens[i]; j++) {
729       lrows[count++] = values[j] - base;
730     }
731   }
732   ierr = PetscFree(rvalues);CHKERRQ(ierr);
733   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
734   ierr = PetscFree(owner);CHKERRQ(ierr);
735   ierr = PetscFree(nprocs);CHKERRQ(ierr);
736 
737   /* fix right hand side if needed */
738   if (x && b) {
739     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
740     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
741     for (i=0; i<slen; i++) {
742       bb[lrows[i]] = diag*xx[lrows[i]];
743     }
744     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
745     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
746   }
747   /*
748         Zero the required rows. If the "diagonal block" of the matrix
749      is square and the user wishes to set the diagonal we use separate
750      code so that MatSetValues() is not called for each diagonal allocating
751      new memory, thus calling lots of mallocs and slowing things down.
752 
753   */
754   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
755   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
756   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
757     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
758   } else if (diag != 0.0) {
759     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
760     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
761       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
762 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
763     }
764     for (i = 0; i < slen; i++) {
765       row  = lrows[i] + rstart;
766       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
767     }
768     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
769     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
770   } else {
771     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
772   }
773   ierr = PetscFree(lrows);CHKERRQ(ierr);
774 
775   /* wait on sends */
776   if (nsends) {
777     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
778     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
779     ierr = PetscFree(send_status);CHKERRQ(ierr);
780   }
781   ierr = PetscFree(send_waits);CHKERRQ(ierr);
782   ierr = PetscFree(svalues);CHKERRQ(ierr);
783 
784   PetscFunctionReturn(0);
785 }
786 
787 #undef __FUNCT__
788 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
789 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
790 {
791   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
792   PetscErrorCode    ierr;
793   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
794   PetscInt          i,*owners = A->rmap->range;
795   PetscInt          *nprocs,j,idx,nsends;
796   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
797   PetscInt          *rvalues,count,base,slen,*source;
798   PetscInt          *lens,*lrows,*values,m;
799   MPI_Comm          comm = ((PetscObject)A)->comm;
800   MPI_Request       *send_waits,*recv_waits;
801   MPI_Status        recv_status,*send_status;
802   const PetscScalar *xx;
803   PetscScalar       *bb,*mask;
804   Vec               xmask,lmask;
805   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
806   const PetscInt    *aj, *ii,*ridx;
807   PetscScalar       *aa;
808 #if defined(PETSC_DEBUG)
809   PetscBool         found = PETSC_FALSE;
810 #endif
811 
812   PetscFunctionBegin;
813   /*  first count number of contributors to each processor */
814   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
815   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
816   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
817   j = 0;
818   for (i=0; i<N; i++) {
819     if (lastidx > (idx = rows[i])) j = 0;
820     lastidx = idx;
821     for (; j<size; j++) {
822       if (idx >= owners[j] && idx < owners[j+1]) {
823         nprocs[2*j]++;
824         nprocs[2*j+1] = 1;
825         owner[i] = j;
826 #if defined(PETSC_DEBUG)
827         found = PETSC_TRUE;
828 #endif
829         break;
830       }
831     }
832 #if defined(PETSC_DEBUG)
833     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
834     found = PETSC_FALSE;
835 #endif
836   }
837   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
838 
839   /* inform other processors of number of messages and max length*/
840   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
841 
842   /* post receives:   */
843   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
844   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
845   for (i=0; i<nrecvs; i++) {
846     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
847   }
848 
849   /* do sends:
850       1) starts[i] gives the starting index in svalues for stuff going to
851          the ith processor
852   */
853   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
854   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
855   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
856   starts[0] = 0;
857   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
858   for (i=0; i<N; i++) {
859     svalues[starts[owner[i]]++] = rows[i];
860   }
861 
862   starts[0] = 0;
863   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
864   count = 0;
865   for (i=0; i<size; i++) {
866     if (nprocs[2*i+1]) {
867       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
868     }
869   }
870   ierr = PetscFree(starts);CHKERRQ(ierr);
871 
872   base = owners[rank];
873 
874   /*  wait on receives */
875   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
876   count  = nrecvs; slen = 0;
877   while (count) {
878     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
879     /* unpack receives into our local space */
880     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
881     source[imdex]  = recv_status.MPI_SOURCE;
882     lens[imdex]    = n;
883     slen          += n;
884     count--;
885   }
886   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
887 
888   /* move the data into the send scatter */
889   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
890   count = 0;
891   for (i=0; i<nrecvs; i++) {
892     values = rvalues + i*nmax;
893     for (j=0; j<lens[i]; j++) {
894       lrows[count++] = values[j] - base;
895     }
896   }
897   ierr = PetscFree(rvalues);CHKERRQ(ierr);
898   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
899   ierr = PetscFree(owner);CHKERRQ(ierr);
900   ierr = PetscFree(nprocs);CHKERRQ(ierr);
901   /* lrows are the local rows to be zeroed, slen is the number of local rows */
902 
903   /* zero diagonal part of matrix */
904   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
905 
906   /* handle off diagonal part of matrix */
907   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
908   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
909   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
910   for (i=0; i<slen; i++) {
911     bb[lrows[i]] = 1;
912   }
913   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
914   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
915   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
916   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
917   ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
918   ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
919   ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
920   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
921   ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
922 
923   /* remove zeroed rows of off diagonal matrix */
924   ii = aij->i;
925   for (i=0; i<slen; i++) {
926     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
927   }
928 
929   /* loop over all elements of off process part of matrix zeroing removed columns*/
930   if (aij->compressedrow.use){
931     m    = aij->compressedrow.nrows;
932     ii   = aij->compressedrow.i;
933     ridx = aij->compressedrow.rindex;
934     for (i=0; i<m; i++){
935       n   = ii[i+1] - ii[i];
936       aj  = aij->j + ii[i];
937       aa  = aij->a + ii[i];
938 
939       for (j=0; j<n; j++) {
940         if (PetscAbsScalar(mask[*aj])) {
941           bb[*ridx] -= *aa*xx[*aj];
942           *aa        = 0.0;
943         }
944         aa++;
945         aj++;
946       }
947       ridx++;
948     }
949   } else { /* do not use compressed row format */
950     m = l->B->rmap->n;
951    for (i=0; i<m; i++) {
952      n   = ii[i+1] - ii[i];
953      aj  = aij->j + ii[i];
954      aa  = aij->a + ii[i];
955      for (j=0; j<n; j++) {
956         if (PetscAbsScalar(mask[*aj])) {
957           bb[i] -= *aa*xx[*aj];
958           *aa    = 0.0;
959         }
960         aa++;
961         aj++;
962       }
963     }
964   }
965 
966   ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
967   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
968   ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
969   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
970   ierr = PetscFree(lrows);CHKERRQ(ierr);
971 
972   /* wait on sends */
973   if (nsends) {
974     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
975     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
976     ierr = PetscFree(send_status);CHKERRQ(ierr);
977   }
978   ierr = PetscFree(send_waits);CHKERRQ(ierr);
979   ierr = PetscFree(svalues);CHKERRQ(ierr);
980 
981   PetscFunctionReturn(0);
982 }
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatMult_MPIAIJ"
986 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
987 {
988   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
989   PetscErrorCode ierr;
990   PetscInt       nt;
991 
992   PetscFunctionBegin;
993   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
994   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
995   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
996   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
997   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
998   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
999   PetscFunctionReturn(0);
1000 }
1001 
1002 #undef __FUNCT__
1003 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1004 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1005 {
1006   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1007   PetscErrorCode ierr;
1008 
1009   PetscFunctionBegin;
1010   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 #undef __FUNCT__
1015 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1016 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1017 {
1018   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1019   PetscErrorCode ierr;
1020 
1021   PetscFunctionBegin;
1022   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1023   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1024   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1025   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1026   PetscFunctionReturn(0);
1027 }
1028 
1029 #undef __FUNCT__
1030 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1031 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1032 {
1033   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1034   PetscErrorCode ierr;
1035   PetscBool      merged;
1036 
1037   PetscFunctionBegin;
1038   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1039   /* do nondiagonal part */
1040   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1041   if (!merged) {
1042     /* send it on its way */
1043     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1044     /* do local part */
1045     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1046     /* receive remote parts: note this assumes the values are not actually */
1047     /* added in yy until the next line, */
1048     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1049   } else {
1050     /* do local part */
1051     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1052     /* send it on its way */
1053     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1054     /* values actually were received in the Begin() but we need to call this nop */
1055     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1056   }
1057   PetscFunctionReturn(0);
1058 }
1059 
1060 EXTERN_C_BEGIN
1061 #undef __FUNCT__
1062 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1063 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1064 {
1065   MPI_Comm       comm;
1066   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1067   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1068   IS             Me,Notme;
1069   PetscErrorCode ierr;
1070   PetscInt       M,N,first,last,*notme,i;
1071   PetscMPIInt    size;
1072 
1073   PetscFunctionBegin;
1074 
1075   /* Easy test: symmetric diagonal block */
1076   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1077   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1078   if (!*f) PetscFunctionReturn(0);
1079   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1080   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1081   if (size == 1) PetscFunctionReturn(0);
1082 
1083   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1084   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1085   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1086   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1087   for (i=0; i<first; i++) notme[i] = i;
1088   for (i=last; i<M; i++) notme[i-last+first] = i;
1089   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1090   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1091   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1092   Aoff = Aoffs[0];
1093   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1094   Boff = Boffs[0];
1095   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1096   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1097   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1098   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1099   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1100   PetscFunctionReturn(0);
1101 }
1102 EXTERN_C_END
1103 
1104 #undef __FUNCT__
1105 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1106 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1107 {
1108   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1109   PetscErrorCode ierr;
1110 
1111   PetscFunctionBegin;
1112   /* do nondiagonal part */
1113   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1114   /* send it on its way */
1115   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1116   /* do local part */
1117   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1118   /* receive remote parts */
1119   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 /*
1124   This only works correctly for square matrices where the subblock A->A is the
1125    diagonal block
1126 */
1127 #undef __FUNCT__
1128 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1129 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1130 {
1131   PetscErrorCode ierr;
1132   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1133 
1134   PetscFunctionBegin;
1135   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1136   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1137   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1138   PetscFunctionReturn(0);
1139 }
1140 
1141 #undef __FUNCT__
1142 #define __FUNCT__ "MatScale_MPIAIJ"
1143 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1144 {
1145   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1146   PetscErrorCode ierr;
1147 
1148   PetscFunctionBegin;
1149   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1150   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1151   PetscFunctionReturn(0);
1152 }
1153 
1154 #undef __FUNCT__
1155 #define __FUNCT__ "MatDestroy_MPIAIJ"
1156 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1157 {
1158   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1159   PetscErrorCode ierr;
1160 
1161   PetscFunctionBegin;
1162 #if defined(PETSC_USE_LOG)
1163   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1164 #endif
1165   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1166   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1167   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1168   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1169 #if defined (PETSC_USE_CTABLE)
1170   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
1171 #else
1172   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1173 #endif
1174   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1175   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1176   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1177   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1178   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1179   ierr = PetscFree(aij);CHKERRQ(ierr);
1180 
1181   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1182   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1183   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1184   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1185   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1186   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1187   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1188   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1189   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1190   PetscFunctionReturn(0);
1191 }
1192 
1193 #undef __FUNCT__
1194 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1195 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1196 {
1197   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1198   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1199   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1200   PetscErrorCode    ierr;
1201   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1202   int               fd;
1203   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1204   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1205   PetscScalar       *column_values;
1206   PetscInt          message_count,flowcontrolcount;
1207 
1208   PetscFunctionBegin;
1209   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1210   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1211   nz   = A->nz + B->nz;
1212   if (!rank) {
1213     header[0] = MAT_FILE_CLASSID;
1214     header[1] = mat->rmap->N;
1215     header[2] = mat->cmap->N;
1216     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1217     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1218     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1219     /* get largest number of rows any processor has */
1220     rlen = mat->rmap->n;
1221     range = mat->rmap->range;
1222     for (i=1; i<size; i++) {
1223       rlen = PetscMax(rlen,range[i+1] - range[i]);
1224     }
1225   } else {
1226     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1227     rlen = mat->rmap->n;
1228   }
1229 
1230   /* load up the local row counts */
1231   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1232   for (i=0; i<mat->rmap->n; i++) {
1233     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1234   }
1235 
1236   /* store the row lengths to the file */
1237   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1238   if (!rank) {
1239     MPI_Status status;
1240     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1241     for (i=1; i<size; i++) {
1242       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1243       rlen = range[i+1] - range[i];
1244       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1245       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1246     }
1247     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1248   } else {
1249     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1250     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1251     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1252   }
1253   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1254 
1255   /* load up the local column indices */
1256   nzmax = nz; /* )th processor needs space a largest processor needs */
1257   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1258   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1259   cnt  = 0;
1260   for (i=0; i<mat->rmap->n; i++) {
1261     for (j=B->i[i]; j<B->i[i+1]; j++) {
1262       if ( (col = garray[B->j[j]]) > cstart) break;
1263       column_indices[cnt++] = col;
1264     }
1265     for (k=A->i[i]; k<A->i[i+1]; k++) {
1266       column_indices[cnt++] = A->j[k] + cstart;
1267     }
1268     for (; j<B->i[i+1]; j++) {
1269       column_indices[cnt++] = garray[B->j[j]];
1270     }
1271   }
1272   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1273 
1274   /* store the column indices to the file */
1275    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1276   if (!rank) {
1277     MPI_Status status;
1278     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1279     for (i=1; i<size; i++) {
1280       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1281       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1282       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1283       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1284       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1285     }
1286      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1287   } else {
1288     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1289     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1290     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1291     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1292   }
1293   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1294 
1295   /* load up the local column values */
1296   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1297   cnt  = 0;
1298   for (i=0; i<mat->rmap->n; i++) {
1299     for (j=B->i[i]; j<B->i[i+1]; j++) {
1300       if ( garray[B->j[j]] > cstart) break;
1301       column_values[cnt++] = B->a[j];
1302     }
1303     for (k=A->i[i]; k<A->i[i+1]; k++) {
1304       column_values[cnt++] = A->a[k];
1305     }
1306     for (; j<B->i[i+1]; j++) {
1307       column_values[cnt++] = B->a[j];
1308     }
1309   }
1310   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1311 
1312   /* store the column values to the file */
1313    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1314   if (!rank) {
1315     MPI_Status status;
1316     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1317     for (i=1; i<size; i++) {
1318        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1319       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1320       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1321       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1322       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1323     }
1324     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1325   } else {
1326     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1327     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1328     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1329     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1330   }
1331   ierr = PetscFree(column_values);CHKERRQ(ierr);
1332   PetscFunctionReturn(0);
1333 }
1334 
1335 #undef __FUNCT__
1336 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1337 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1338 {
1339   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1340   PetscErrorCode    ierr;
1341   PetscMPIInt       rank = aij->rank,size = aij->size;
1342   PetscBool         isdraw,iascii,isbinary;
1343   PetscViewer       sviewer;
1344   PetscViewerFormat format;
1345 
1346   PetscFunctionBegin;
1347   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1348   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1349   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1350   if (iascii) {
1351     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1352     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1353       MatInfo    info;
1354       PetscBool  inodes;
1355 
1356       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1357       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1358       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1359       if (!inodes) {
1360         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1361 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1362       } else {
1363         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1364 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1365       }
1366       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1367       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1368       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1369       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1370       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1371       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1372       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1373       PetscFunctionReturn(0);
1374     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1375       PetscInt   inodecount,inodelimit,*inodes;
1376       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1377       if (inodes) {
1378         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1379       } else {
1380         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1381       }
1382       PetscFunctionReturn(0);
1383     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1384       PetscFunctionReturn(0);
1385     }
1386   } else if (isbinary) {
1387     if (size == 1) {
1388       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1389       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1390     } else {
1391       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1392     }
1393     PetscFunctionReturn(0);
1394   } else if (isdraw) {
1395     PetscDraw  draw;
1396     PetscBool  isnull;
1397     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1398     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1399   }
1400 
1401   if (size == 1) {
1402     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1403     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1404   } else {
1405     /* assemble the entire matrix onto first processor. */
1406     Mat         A;
1407     Mat_SeqAIJ  *Aloc;
1408     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1409     MatScalar   *a;
1410 
1411     if (mat->rmap->N > 1024) {
1412       PetscBool  flg = PETSC_FALSE;
1413 
1414       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1415       if (!flg) {
1416         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1417       }
1418     }
1419 
1420     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1421     if (!rank) {
1422       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1423     } else {
1424       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1425     }
1426     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1427     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1428     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1429     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1430 
1431     /* copy over the A part */
1432     Aloc = (Mat_SeqAIJ*)aij->A->data;
1433     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1434     row = mat->rmap->rstart;
1435     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1436     for (i=0; i<m; i++) {
1437       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1438       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1439     }
1440     aj = Aloc->j;
1441     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1442 
1443     /* copy over the B part */
1444     Aloc = (Mat_SeqAIJ*)aij->B->data;
1445     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1446     row  = mat->rmap->rstart;
1447     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1448     ct   = cols;
1449     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1450     for (i=0; i<m; i++) {
1451       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1452       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1453     }
1454     ierr = PetscFree(ct);CHKERRQ(ierr);
1455     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1456     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1457     /*
1458        Everyone has to call to draw the matrix since the graphics waits are
1459        synchronized across all processors that share the PetscDraw object
1460     */
1461     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1462     if (!rank) {
1463       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1464       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1465       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1466       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1467     }
1468     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1469     ierr = MatDestroy(&A);CHKERRQ(ierr);
1470   }
1471   PetscFunctionReturn(0);
1472 }
1473 
1474 #undef __FUNCT__
1475 #define __FUNCT__ "MatView_MPIAIJ"
1476 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1477 {
1478   PetscErrorCode ierr;
1479   PetscBool      iascii,isdraw,issocket,isbinary;
1480 
1481   PetscFunctionBegin;
1482   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1483   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1484   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1485   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1486   if (iascii || isdraw || isbinary || issocket) {
1487     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1488   } else {
1489     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1490   }
1491   PetscFunctionReturn(0);
1492 }
1493 
1494 #undef __FUNCT__
1495 #define __FUNCT__ "MatSOR_MPIAIJ"
1496 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1497 {
1498   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1499   PetscErrorCode ierr;
1500   Vec            bb1 = 0;
1501   PetscBool      hasop;
1502 
1503   PetscFunctionBegin;
1504   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1505     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1506   }
1507 
1508   if (flag == SOR_APPLY_UPPER) {
1509     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1510     PetscFunctionReturn(0);
1511   }
1512 
1513   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1514     if (flag & SOR_ZERO_INITIAL_GUESS) {
1515       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1516       its--;
1517     }
1518 
1519     while (its--) {
1520       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1521       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1522 
1523       /* update rhs: bb1 = bb - B*x */
1524       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1525       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1526 
1527       /* local sweep */
1528       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1529     }
1530   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1531     if (flag & SOR_ZERO_INITIAL_GUESS) {
1532       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1533       its--;
1534     }
1535     while (its--) {
1536       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1537       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1538 
1539       /* update rhs: bb1 = bb - B*x */
1540       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1541       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1542 
1543       /* local sweep */
1544       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1545     }
1546   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1547     if (flag & SOR_ZERO_INITIAL_GUESS) {
1548       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1549       its--;
1550     }
1551     while (its--) {
1552       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1553       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1554 
1555       /* update rhs: bb1 = bb - B*x */
1556       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1557       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1558 
1559       /* local sweep */
1560       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1561     }
1562   }  else if (flag & SOR_EISENSTAT) {
1563     Vec         xx1;
1564 
1565     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1566     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1567 
1568     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1569     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1570     if (!mat->diag) {
1571       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1572       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1573     }
1574     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1575     if (hasop) {
1576       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1577     } else {
1578       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1579     }
1580     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1581 
1582     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1583 
1584     /* local sweep */
1585     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1586     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1587     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1588   } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1589 
1590   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1591   PetscFunctionReturn(0);
1592 }
1593 
1594 #undef __FUNCT__
1595 #define __FUNCT__ "MatPermute_MPIAIJ"
1596 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1597 {
1598   MPI_Comm       comm,pcomm;
1599   PetscInt       first,local_size,nrows;
1600   const PetscInt *rows;
1601   PetscMPIInt    size;
1602   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1603   PetscErrorCode ierr;
1604 
1605   PetscFunctionBegin;
1606   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1607   /* make a collective version of 'rowp' */
1608   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1609   if (pcomm==comm) {
1610     crowp = rowp;
1611   } else {
1612     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1613     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1614     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
1615     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1616   }
1617   /* collect the global row permutation and invert it */
1618   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1619   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1620   if (pcomm!=comm) {
1621     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1622   }
1623   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1624   /* get the local target indices */
1625   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1626   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1627   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1628   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1629   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1630   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1631   /* the column permutation is so much easier;
1632      make a local version of 'colp' and invert it */
1633   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1634   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1635   if (size==1) {
1636     lcolp = colp;
1637   } else {
1638     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1639     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1640     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
1641   }
1642   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1643   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1644   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1645   if (size>1) {
1646     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1647     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1648   }
1649   /* now we just get the submatrix */
1650   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1651   /* clean up */
1652   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1653   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1654   PetscFunctionReturn(0);
1655 }
1656 
1657 #undef __FUNCT__
1658 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1659 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1660 {
1661   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1662   Mat            A = mat->A,B = mat->B;
1663   PetscErrorCode ierr;
1664   PetscReal      isend[5],irecv[5];
1665 
1666   PetscFunctionBegin;
1667   info->block_size     = 1.0;
1668   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1669   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1670   isend[3] = info->memory;  isend[4] = info->mallocs;
1671   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1672   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1673   isend[3] += info->memory;  isend[4] += info->mallocs;
1674   if (flag == MAT_LOCAL) {
1675     info->nz_used      = isend[0];
1676     info->nz_allocated = isend[1];
1677     info->nz_unneeded  = isend[2];
1678     info->memory       = isend[3];
1679     info->mallocs      = isend[4];
1680   } else if (flag == MAT_GLOBAL_MAX) {
1681     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1682     info->nz_used      = irecv[0];
1683     info->nz_allocated = irecv[1];
1684     info->nz_unneeded  = irecv[2];
1685     info->memory       = irecv[3];
1686     info->mallocs      = irecv[4];
1687   } else if (flag == MAT_GLOBAL_SUM) {
1688     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1689     info->nz_used      = irecv[0];
1690     info->nz_allocated = irecv[1];
1691     info->nz_unneeded  = irecv[2];
1692     info->memory       = irecv[3];
1693     info->mallocs      = irecv[4];
1694   }
1695   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1696   info->fill_ratio_needed = 0;
1697   info->factor_mallocs    = 0;
1698 
1699   PetscFunctionReturn(0);
1700 }
1701 
1702 #undef __FUNCT__
1703 #define __FUNCT__ "MatSetOption_MPIAIJ"
1704 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1705 {
1706   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1707   PetscErrorCode ierr;
1708 
1709   PetscFunctionBegin;
1710   switch (op) {
1711   case MAT_NEW_NONZERO_LOCATIONS:
1712   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1713   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1714   case MAT_KEEP_NONZERO_PATTERN:
1715   case MAT_NEW_NONZERO_LOCATION_ERR:
1716   case MAT_USE_INODES:
1717   case MAT_IGNORE_ZERO_ENTRIES:
1718     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1719     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1720     break;
1721   case MAT_ROW_ORIENTED:
1722     a->roworiented = flg;
1723     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1724     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1725     break;
1726   case MAT_NEW_DIAGONALS:
1727     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1728     break;
1729   case MAT_IGNORE_OFF_PROC_ENTRIES:
1730     a->donotstash = PETSC_TRUE;
1731     break;
1732   case MAT_SPD:
1733     A->spd_set                         = PETSC_TRUE;
1734     A->spd                             = flg;
1735     if (flg) {
1736       A->symmetric                     = PETSC_TRUE;
1737       A->structurally_symmetric        = PETSC_TRUE;
1738       A->symmetric_set                 = PETSC_TRUE;
1739       A->structurally_symmetric_set    = PETSC_TRUE;
1740     }
1741     break;
1742   case MAT_SYMMETRIC:
1743     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1744     break;
1745   case MAT_STRUCTURALLY_SYMMETRIC:
1746     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1747     break;
1748   case MAT_HERMITIAN:
1749     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1750     break;
1751   case MAT_SYMMETRY_ETERNAL:
1752     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1753     break;
1754   default:
1755     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1756   }
1757   PetscFunctionReturn(0);
1758 }
1759 
1760 #undef __FUNCT__
1761 #define __FUNCT__ "MatGetRow_MPIAIJ"
1762 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1763 {
1764   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1765   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1766   PetscErrorCode ierr;
1767   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1768   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1769   PetscInt       *cmap,*idx_p;
1770 
1771   PetscFunctionBegin;
1772   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1773   mat->getrowactive = PETSC_TRUE;
1774 
1775   if (!mat->rowvalues && (idx || v)) {
1776     /*
1777         allocate enough space to hold information from the longest row.
1778     */
1779     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1780     PetscInt   max = 1,tmp;
1781     for (i=0; i<matin->rmap->n; i++) {
1782       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1783       if (max < tmp) { max = tmp; }
1784     }
1785     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1786   }
1787 
1788   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1789   lrow = row - rstart;
1790 
1791   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1792   if (!v)   {pvA = 0; pvB = 0;}
1793   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1794   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1795   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1796   nztot = nzA + nzB;
1797 
1798   cmap  = mat->garray;
1799   if (v  || idx) {
1800     if (nztot) {
1801       /* Sort by increasing column numbers, assuming A and B already sorted */
1802       PetscInt imark = -1;
1803       if (v) {
1804         *v = v_p = mat->rowvalues;
1805         for (i=0; i<nzB; i++) {
1806           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1807           else break;
1808         }
1809         imark = i;
1810         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1811         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1812       }
1813       if (idx) {
1814         *idx = idx_p = mat->rowindices;
1815         if (imark > -1) {
1816           for (i=0; i<imark; i++) {
1817             idx_p[i] = cmap[cworkB[i]];
1818           }
1819         } else {
1820           for (i=0; i<nzB; i++) {
1821             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1822             else break;
1823           }
1824           imark = i;
1825         }
1826         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1827         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1828       }
1829     } else {
1830       if (idx) *idx = 0;
1831       if (v)   *v   = 0;
1832     }
1833   }
1834   *nz = nztot;
1835   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1836   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1837   PetscFunctionReturn(0);
1838 }
1839 
1840 #undef __FUNCT__
1841 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1842 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1843 {
1844   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1845 
1846   PetscFunctionBegin;
1847   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1848   aij->getrowactive = PETSC_FALSE;
1849   PetscFunctionReturn(0);
1850 }
1851 
1852 #undef __FUNCT__
1853 #define __FUNCT__ "MatNorm_MPIAIJ"
1854 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1855 {
1856   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1857   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1858   PetscErrorCode ierr;
1859   PetscInt       i,j,cstart = mat->cmap->rstart;
1860   PetscReal      sum = 0.0;
1861   MatScalar      *v;
1862 
1863   PetscFunctionBegin;
1864   if (aij->size == 1) {
1865     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1866   } else {
1867     if (type == NORM_FROBENIUS) {
1868       v = amat->a;
1869       for (i=0; i<amat->nz; i++) {
1870 #if defined(PETSC_USE_COMPLEX)
1871         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1872 #else
1873         sum += (*v)*(*v); v++;
1874 #endif
1875       }
1876       v = bmat->a;
1877       for (i=0; i<bmat->nz; i++) {
1878 #if defined(PETSC_USE_COMPLEX)
1879         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1880 #else
1881         sum += (*v)*(*v); v++;
1882 #endif
1883       }
1884       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1885       *norm = sqrt(*norm);
1886     } else if (type == NORM_1) { /* max column norm */
1887       PetscReal *tmp,*tmp2;
1888       PetscInt  *jj,*garray = aij->garray;
1889       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1890       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1891       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1892       *norm = 0.0;
1893       v = amat->a; jj = amat->j;
1894       for (j=0; j<amat->nz; j++) {
1895         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1896       }
1897       v = bmat->a; jj = bmat->j;
1898       for (j=0; j<bmat->nz; j++) {
1899         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1900       }
1901       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1902       for (j=0; j<mat->cmap->N; j++) {
1903         if (tmp2[j] > *norm) *norm = tmp2[j];
1904       }
1905       ierr = PetscFree(tmp);CHKERRQ(ierr);
1906       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1907     } else if (type == NORM_INFINITY) { /* max row norm */
1908       PetscReal ntemp = 0.0;
1909       for (j=0; j<aij->A->rmap->n; j++) {
1910         v = amat->a + amat->i[j];
1911         sum = 0.0;
1912         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1913           sum += PetscAbsScalar(*v); v++;
1914         }
1915         v = bmat->a + bmat->i[j];
1916         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1917           sum += PetscAbsScalar(*v); v++;
1918         }
1919         if (sum > ntemp) ntemp = sum;
1920       }
1921       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1922     } else {
1923       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1924     }
1925   }
1926   PetscFunctionReturn(0);
1927 }
1928 
1929 #undef __FUNCT__
1930 #define __FUNCT__ "MatTranspose_MPIAIJ"
1931 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1932 {
1933   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1934   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1935   PetscErrorCode ierr;
1936   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1937   PetscInt       cstart=A->cmap->rstart,ncol;
1938   Mat            B;
1939   MatScalar      *array;
1940 
1941   PetscFunctionBegin;
1942   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1943 
1944   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1945   ai = Aloc->i; aj = Aloc->j;
1946   bi = Bloc->i; bj = Bloc->j;
1947   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1948     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1949     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1950     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1951     for (i=0; i<ai[ma]; i++){
1952       d_nnz[aj[i]] ++;
1953       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1954     }
1955 
1956     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1957     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1958     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1959     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1960     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1961   } else {
1962     B = *matout;
1963   }
1964 
1965   /* copy over the A part */
1966   array = Aloc->a;
1967   row = A->rmap->rstart;
1968   for (i=0; i<ma; i++) {
1969     ncol = ai[i+1]-ai[i];
1970     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1971     row++; array += ncol; aj += ncol;
1972   }
1973   aj = Aloc->j;
1974   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1975 
1976   /* copy over the B part */
1977   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1978   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1979   array = Bloc->a;
1980   row = A->rmap->rstart;
1981   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1982   cols_tmp = cols;
1983   for (i=0; i<mb; i++) {
1984     ncol = bi[i+1]-bi[i];
1985     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1986     row++; array += ncol; cols_tmp += ncol;
1987   }
1988   ierr = PetscFree(cols);CHKERRQ(ierr);
1989 
1990   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1991   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1992   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1993     *matout = B;
1994   } else {
1995     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1996   }
1997   PetscFunctionReturn(0);
1998 }
1999 
2000 #undef __FUNCT__
2001 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2002 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2003 {
2004   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2005   Mat            a = aij->A,b = aij->B;
2006   PetscErrorCode ierr;
2007   PetscInt       s1,s2,s3;
2008 
2009   PetscFunctionBegin;
2010   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2011   if (rr) {
2012     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2013     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2014     /* Overlap communication with computation. */
2015     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2016   }
2017   if (ll) {
2018     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2019     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2020     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2021   }
2022   /* scale  the diagonal block */
2023   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2024 
2025   if (rr) {
2026     /* Do a scatter end and then right scale the off-diagonal block */
2027     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2028     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2029   }
2030 
2031   PetscFunctionReturn(0);
2032 }
2033 
2034 #undef __FUNCT__
2035 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
2036 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
2037 {
2038   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2039   PetscErrorCode ierr;
2040 
2041   PetscFunctionBegin;
2042   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
2043   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
2044   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
2045   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
2046   PetscFunctionReturn(0);
2047 }
2048 #undef __FUNCT__
2049 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2050 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2051 {
2052   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2053   PetscErrorCode ierr;
2054 
2055   PetscFunctionBegin;
2056   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2057   PetscFunctionReturn(0);
2058 }
2059 
2060 #undef __FUNCT__
2061 #define __FUNCT__ "MatEqual_MPIAIJ"
2062 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2063 {
2064   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2065   Mat            a,b,c,d;
2066   PetscBool      flg;
2067   PetscErrorCode ierr;
2068 
2069   PetscFunctionBegin;
2070   a = matA->A; b = matA->B;
2071   c = matB->A; d = matB->B;
2072 
2073   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2074   if (flg) {
2075     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2076   }
2077   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2078   PetscFunctionReturn(0);
2079 }
2080 
2081 #undef __FUNCT__
2082 #define __FUNCT__ "MatCopy_MPIAIJ"
2083 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2084 {
2085   PetscErrorCode ierr;
2086   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2087   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2088 
2089   PetscFunctionBegin;
2090   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2091   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2092     /* because of the column compression in the off-processor part of the matrix a->B,
2093        the number of columns in a->B and b->B may be different, hence we cannot call
2094        the MatCopy() directly on the two parts. If need be, we can provide a more
2095        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2096        then copying the submatrices */
2097     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2098   } else {
2099     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2100     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2101   }
2102   PetscFunctionReturn(0);
2103 }
2104 
2105 #undef __FUNCT__
2106 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
2107 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
2108 {
2109   PetscErrorCode ierr;
2110 
2111   PetscFunctionBegin;
2112   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2113   PetscFunctionReturn(0);
2114 }
2115 
2116 #undef __FUNCT__
2117 #define __FUNCT__ "MatAXPY_MPIAIJ"
2118 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2119 {
2120   PetscErrorCode ierr;
2121   PetscInt       i;
2122   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2123   PetscBLASInt   bnz,one=1;
2124   Mat_SeqAIJ     *x,*y;
2125 
2126   PetscFunctionBegin;
2127   if (str == SAME_NONZERO_PATTERN) {
2128     PetscScalar alpha = a;
2129     x = (Mat_SeqAIJ *)xx->A->data;
2130     y = (Mat_SeqAIJ *)yy->A->data;
2131     bnz = PetscBLASIntCast(x->nz);
2132     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2133     x = (Mat_SeqAIJ *)xx->B->data;
2134     y = (Mat_SeqAIJ *)yy->B->data;
2135     bnz = PetscBLASIntCast(x->nz);
2136     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2137   } else if (str == SUBSET_NONZERO_PATTERN) {
2138     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2139 
2140     x = (Mat_SeqAIJ *)xx->B->data;
2141     y = (Mat_SeqAIJ *)yy->B->data;
2142     if (y->xtoy && y->XtoY != xx->B) {
2143       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2144       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2145     }
2146     if (!y->xtoy) { /* get xtoy */
2147       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2148       y->XtoY = xx->B;
2149       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2150     }
2151     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2152   } else {
2153     Mat B;
2154     PetscInt *nnz_d,*nnz_o;
2155     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2156     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2157     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2158     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2159     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2160     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2161     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2162     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr);
2163     ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr);
2164     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2165     ierr = MatHeaderReplace(Y,B);
2166     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2167     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2168   }
2169   PetscFunctionReturn(0);
2170 }
2171 
2172 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2173 
2174 #undef __FUNCT__
2175 #define __FUNCT__ "MatConjugate_MPIAIJ"
2176 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2177 {
2178 #if defined(PETSC_USE_COMPLEX)
2179   PetscErrorCode ierr;
2180   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2181 
2182   PetscFunctionBegin;
2183   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2184   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2185 #else
2186   PetscFunctionBegin;
2187 #endif
2188   PetscFunctionReturn(0);
2189 }
2190 
2191 #undef __FUNCT__
2192 #define __FUNCT__ "MatRealPart_MPIAIJ"
2193 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2194 {
2195   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2196   PetscErrorCode ierr;
2197 
2198   PetscFunctionBegin;
2199   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2200   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2201   PetscFunctionReturn(0);
2202 }
2203 
2204 #undef __FUNCT__
2205 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2206 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2207 {
2208   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2209   PetscErrorCode ierr;
2210 
2211   PetscFunctionBegin;
2212   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2213   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2214   PetscFunctionReturn(0);
2215 }
2216 
2217 #ifdef PETSC_HAVE_PBGL
2218 
2219 #include <boost/parallel/mpi/bsp_process_group.hpp>
2220 #include <boost/graph/distributed/ilu_default_graph.hpp>
2221 #include <boost/graph/distributed/ilu_0_block.hpp>
2222 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2223 #include <boost/graph/distributed/petsc/interface.hpp>
2224 #include <boost/multi_array.hpp>
2225 #include <boost/parallel/distributed_property_map->hpp>
2226 
2227 #undef __FUNCT__
2228 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2229 /*
2230   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2231 */
2232 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2233 {
2234   namespace petsc = boost::distributed::petsc;
2235 
2236   namespace graph_dist = boost::graph::distributed;
2237   using boost::graph::distributed::ilu_default::process_group_type;
2238   using boost::graph::ilu_permuted;
2239 
2240   PetscBool       row_identity, col_identity;
2241   PetscContainer  c;
2242   PetscInt        m, n, M, N;
2243   PetscErrorCode  ierr;
2244 
2245   PetscFunctionBegin;
2246   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2247   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2248   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2249   if (!row_identity || !col_identity) {
2250     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2251   }
2252 
2253   process_group_type pg;
2254   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2255   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2256   lgraph_type&   level_graph = *lgraph_p;
2257   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2258 
2259   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2260   ilu_permuted(level_graph);
2261 
2262   /* put together the new matrix */
2263   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2264   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2265   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2266   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2267   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2268   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2269   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2270 
2271   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2272   ierr = PetscContainerSetPointer(c, lgraph_p);
2273   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2274   PetscFunctionReturn(0);
2275 }
2276 
2277 #undef __FUNCT__
2278 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2279 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2280 {
2281   PetscFunctionBegin;
2282   PetscFunctionReturn(0);
2283 }
2284 
2285 #undef __FUNCT__
2286 #define __FUNCT__ "MatSolve_MPIAIJ"
2287 /*
2288   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2289 */
2290 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2291 {
2292   namespace graph_dist = boost::graph::distributed;
2293 
2294   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2295   lgraph_type*   lgraph_p;
2296   PetscContainer c;
2297   PetscErrorCode ierr;
2298 
2299   PetscFunctionBegin;
2300   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2301   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2302   ierr = VecCopy(b, x);CHKERRQ(ierr);
2303 
2304   PetscScalar* array_x;
2305   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2306   PetscInt sx;
2307   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2308 
2309   PetscScalar* array_b;
2310   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2311   PetscInt sb;
2312   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2313 
2314   lgraph_type&   level_graph = *lgraph_p;
2315   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2316 
2317   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2318   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2319                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2320 
2321   typedef boost::iterator_property_map<array_ref_type::iterator,
2322                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2323   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2324                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2325 
2326   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2327 
2328   PetscFunctionReturn(0);
2329 }
2330 #endif
2331 
2332 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2333   PetscInt       nzlocal,nsends,nrecvs;
2334   PetscMPIInt    *send_rank,*recv_rank;
2335   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2336   PetscScalar    *sbuf_a,**rbuf_a;
2337   PetscErrorCode (*MatDestroy)(Mat);
2338 } Mat_Redundant;
2339 
2340 #undef __FUNCT__
2341 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2342 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2343 {
2344   PetscErrorCode       ierr;
2345   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2346   PetscInt             i;
2347 
2348   PetscFunctionBegin;
2349   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2350   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2351   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2352   for (i=0; i<redund->nrecvs; i++){
2353     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2354     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2355   }
2356   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2357   ierr = PetscFree(redund);CHKERRQ(ierr);
2358   PetscFunctionReturn(0);
2359 }
2360 
2361 #undef __FUNCT__
2362 #define __FUNCT__ "MatDestroy_MatRedundant"
2363 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2364 {
2365   PetscErrorCode  ierr;
2366   PetscContainer  container;
2367   Mat_Redundant   *redund=PETSC_NULL;
2368 
2369   PetscFunctionBegin;
2370   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2371   if (container) {
2372     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2373   } else {
2374     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2375   }
2376   A->ops->destroy = redund->MatDestroy;
2377   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2378   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2379   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2380   PetscFunctionReturn(0);
2381 }
2382 
2383 #undef __FUNCT__
2384 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2385 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2386 {
2387   PetscMPIInt    rank,size;
2388   MPI_Comm       comm=((PetscObject)mat)->comm;
2389   PetscErrorCode ierr;
2390   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2391   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2392   PetscInt       *rowrange=mat->rmap->range;
2393   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2394   Mat            A=aij->A,B=aij->B,C=*matredundant;
2395   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2396   PetscScalar    *sbuf_a;
2397   PetscInt       nzlocal=a->nz+b->nz;
2398   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2399   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2400   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2401   MatScalar      *aworkA,*aworkB;
2402   PetscScalar    *vals;
2403   PetscMPIInt    tag1,tag2,tag3,imdex;
2404   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2405                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2406   MPI_Status     recv_status,*send_status;
2407   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2408   PetscInt       **rbuf_j=PETSC_NULL;
2409   PetscScalar    **rbuf_a=PETSC_NULL;
2410   Mat_Redundant  *redund=PETSC_NULL;
2411   PetscContainer container;
2412 
2413   PetscFunctionBegin;
2414   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2415   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2416 
2417   if (reuse == MAT_REUSE_MATRIX) {
2418     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2419     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2420     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2421     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2422     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2423     if (container) {
2424       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2425     } else {
2426       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2427     }
2428     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2429 
2430     nsends    = redund->nsends;
2431     nrecvs    = redund->nrecvs;
2432     send_rank = redund->send_rank;
2433     recv_rank = redund->recv_rank;
2434     sbuf_nz   = redund->sbuf_nz;
2435     rbuf_nz   = redund->rbuf_nz;
2436     sbuf_j    = redund->sbuf_j;
2437     sbuf_a    = redund->sbuf_a;
2438     rbuf_j    = redund->rbuf_j;
2439     rbuf_a    = redund->rbuf_a;
2440   }
2441 
2442   if (reuse == MAT_INITIAL_MATRIX){
2443     PetscMPIInt  subrank,subsize;
2444     PetscInt     nleftover,np_subcomm;
2445     /* get the destination processors' id send_rank, nsends and nrecvs */
2446     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2447     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2448     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2449     np_subcomm = size/nsubcomm;
2450     nleftover  = size - nsubcomm*np_subcomm;
2451     nsends = 0; nrecvs = 0;
2452     for (i=0; i<size; i++){ /* i=rank*/
2453       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2454         send_rank[nsends] = i; nsends++;
2455         recv_rank[nrecvs++] = i;
2456       }
2457     }
2458     if (rank >= size - nleftover){/* this proc is a leftover processor */
2459       i = size-nleftover-1;
2460       j = 0;
2461       while (j < nsubcomm - nleftover){
2462         send_rank[nsends++] = i;
2463         i--; j++;
2464       }
2465     }
2466 
2467     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2468       for (i=0; i<nleftover; i++){
2469         recv_rank[nrecvs++] = size-nleftover+i;
2470       }
2471     }
2472 
2473     /* allocate sbuf_j, sbuf_a */
2474     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2475     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2476     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2477   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2478 
2479   /* copy mat's local entries into the buffers */
2480   if (reuse == MAT_INITIAL_MATRIX){
2481     rownz_max = 0;
2482     rptr = sbuf_j;
2483     cols = sbuf_j + rend-rstart + 1;
2484     vals = sbuf_a;
2485     rptr[0] = 0;
2486     for (i=0; i<rend-rstart; i++){
2487       row = i + rstart;
2488       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2489       ncols  = nzA + nzB;
2490       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2491       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2492       /* load the column indices for this row into cols */
2493       lwrite = 0;
2494       for (l=0; l<nzB; l++) {
2495         if ((ctmp = bmap[cworkB[l]]) < cstart){
2496           vals[lwrite]   = aworkB[l];
2497           cols[lwrite++] = ctmp;
2498         }
2499       }
2500       for (l=0; l<nzA; l++){
2501         vals[lwrite]   = aworkA[l];
2502         cols[lwrite++] = cstart + cworkA[l];
2503       }
2504       for (l=0; l<nzB; l++) {
2505         if ((ctmp = bmap[cworkB[l]]) >= cend){
2506           vals[lwrite]   = aworkB[l];
2507           cols[lwrite++] = ctmp;
2508         }
2509       }
2510       vals += ncols;
2511       cols += ncols;
2512       rptr[i+1] = rptr[i] + ncols;
2513       if (rownz_max < ncols) rownz_max = ncols;
2514     }
2515     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2516   } else { /* only copy matrix values into sbuf_a */
2517     rptr = sbuf_j;
2518     vals = sbuf_a;
2519     rptr[0] = 0;
2520     for (i=0; i<rend-rstart; i++){
2521       row = i + rstart;
2522       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2523       ncols  = nzA + nzB;
2524       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2525       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2526       lwrite = 0;
2527       for (l=0; l<nzB; l++) {
2528         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2529       }
2530       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2531       for (l=0; l<nzB; l++) {
2532         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2533       }
2534       vals += ncols;
2535       rptr[i+1] = rptr[i] + ncols;
2536     }
2537   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2538 
2539   /* send nzlocal to others, and recv other's nzlocal */
2540   /*--------------------------------------------------*/
2541   if (reuse == MAT_INITIAL_MATRIX){
2542     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2543     s_waits2 = s_waits3 + nsends;
2544     s_waits1 = s_waits2 + nsends;
2545     r_waits1 = s_waits1 + nsends;
2546     r_waits2 = r_waits1 + nrecvs;
2547     r_waits3 = r_waits2 + nrecvs;
2548   } else {
2549     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2550     r_waits3 = s_waits3 + nsends;
2551   }
2552 
2553   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2554   if (reuse == MAT_INITIAL_MATRIX){
2555     /* get new tags to keep the communication clean */
2556     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2557     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2558     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2559 
2560     /* post receives of other's nzlocal */
2561     for (i=0; i<nrecvs; i++){
2562       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2563     }
2564     /* send nzlocal to others */
2565     for (i=0; i<nsends; i++){
2566       sbuf_nz[i] = nzlocal;
2567       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2568     }
2569     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2570     count = nrecvs;
2571     while (count) {
2572       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2573       recv_rank[imdex] = recv_status.MPI_SOURCE;
2574       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2575       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2576 
2577       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2578       rbuf_nz[imdex] += i + 2;
2579       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2580       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2581       count--;
2582     }
2583     /* wait on sends of nzlocal */
2584     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2585     /* send mat->i,j to others, and recv from other's */
2586     /*------------------------------------------------*/
2587     for (i=0; i<nsends; i++){
2588       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2589       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2590     }
2591     /* wait on receives of mat->i,j */
2592     /*------------------------------*/
2593     count = nrecvs;
2594     while (count) {
2595       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2596       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2597       count--;
2598     }
2599     /* wait on sends of mat->i,j */
2600     /*---------------------------*/
2601     if (nsends) {
2602       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2603     }
2604   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2605 
2606   /* post receives, send and receive mat->a */
2607   /*----------------------------------------*/
2608   for (imdex=0; imdex<nrecvs; imdex++) {
2609     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2610   }
2611   for (i=0; i<nsends; i++){
2612     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2613   }
2614   count = nrecvs;
2615   while (count) {
2616     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2617     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2618     count--;
2619   }
2620   if (nsends) {
2621     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2622   }
2623 
2624   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2625 
2626   /* create redundant matrix */
2627   /*-------------------------*/
2628   if (reuse == MAT_INITIAL_MATRIX){
2629     /* compute rownz_max for preallocation */
2630     for (imdex=0; imdex<nrecvs; imdex++){
2631       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2632       rptr = rbuf_j[imdex];
2633       for (i=0; i<j; i++){
2634         ncols = rptr[i+1] - rptr[i];
2635         if (rownz_max < ncols) rownz_max = ncols;
2636       }
2637     }
2638 
2639     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2640     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2641     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2642     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2643     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2644   } else {
2645     C = *matredundant;
2646   }
2647 
2648   /* insert local matrix entries */
2649   rptr = sbuf_j;
2650   cols = sbuf_j + rend-rstart + 1;
2651   vals = sbuf_a;
2652   for (i=0; i<rend-rstart; i++){
2653     row   = i + rstart;
2654     ncols = rptr[i+1] - rptr[i];
2655     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2656     vals += ncols;
2657     cols += ncols;
2658   }
2659   /* insert received matrix entries */
2660   for (imdex=0; imdex<nrecvs; imdex++){
2661     rstart = rowrange[recv_rank[imdex]];
2662     rend   = rowrange[recv_rank[imdex]+1];
2663     rptr = rbuf_j[imdex];
2664     cols = rbuf_j[imdex] + rend-rstart + 1;
2665     vals = rbuf_a[imdex];
2666     for (i=0; i<rend-rstart; i++){
2667       row   = i + rstart;
2668       ncols = rptr[i+1] - rptr[i];
2669       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2670       vals += ncols;
2671       cols += ncols;
2672     }
2673   }
2674   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2675   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2676   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2677   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2678   if (reuse == MAT_INITIAL_MATRIX){
2679     PetscContainer container;
2680     *matredundant = C;
2681     /* create a supporting struct and attach it to C for reuse */
2682     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2683     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2684     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2685     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2686     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2687 
2688     redund->nzlocal = nzlocal;
2689     redund->nsends  = nsends;
2690     redund->nrecvs  = nrecvs;
2691     redund->send_rank = send_rank;
2692     redund->recv_rank = recv_rank;
2693     redund->sbuf_nz = sbuf_nz;
2694     redund->rbuf_nz = rbuf_nz;
2695     redund->sbuf_j  = sbuf_j;
2696     redund->sbuf_a  = sbuf_a;
2697     redund->rbuf_j  = rbuf_j;
2698     redund->rbuf_a  = rbuf_a;
2699 
2700     redund->MatDestroy = C->ops->destroy;
2701     C->ops->destroy    = MatDestroy_MatRedundant;
2702   }
2703   PetscFunctionReturn(0);
2704 }
2705 
2706 #undef __FUNCT__
2707 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2708 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2709 {
2710   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2711   PetscErrorCode ierr;
2712   PetscInt       i,*idxb = 0;
2713   PetscScalar    *va,*vb;
2714   Vec            vtmp;
2715 
2716   PetscFunctionBegin;
2717   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2718   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2719   if (idx) {
2720     for (i=0; i<A->rmap->n; i++) {
2721       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2722     }
2723   }
2724 
2725   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2726   if (idx) {
2727     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2728   }
2729   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2730   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2731 
2732   for (i=0; i<A->rmap->n; i++){
2733     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2734       va[i] = vb[i];
2735       if (idx) idx[i] = a->garray[idxb[i]];
2736     }
2737   }
2738 
2739   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2740   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2741   ierr = PetscFree(idxb);CHKERRQ(ierr);
2742   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2743   PetscFunctionReturn(0);
2744 }
2745 
2746 #undef __FUNCT__
2747 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2748 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2749 {
2750   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2751   PetscErrorCode ierr;
2752   PetscInt       i,*idxb = 0;
2753   PetscScalar    *va,*vb;
2754   Vec            vtmp;
2755 
2756   PetscFunctionBegin;
2757   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2758   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2759   if (idx) {
2760     for (i=0; i<A->cmap->n; i++) {
2761       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2762     }
2763   }
2764 
2765   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2766   if (idx) {
2767     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2768   }
2769   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2770   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2771 
2772   for (i=0; i<A->rmap->n; i++){
2773     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2774       va[i] = vb[i];
2775       if (idx) idx[i] = a->garray[idxb[i]];
2776     }
2777   }
2778 
2779   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2780   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2781   ierr = PetscFree(idxb);CHKERRQ(ierr);
2782   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2783   PetscFunctionReturn(0);
2784 }
2785 
2786 #undef __FUNCT__
2787 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2788 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2789 {
2790   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2791   PetscInt       n      = A->rmap->n;
2792   PetscInt       cstart = A->cmap->rstart;
2793   PetscInt      *cmap   = mat->garray;
2794   PetscInt      *diagIdx, *offdiagIdx;
2795   Vec            diagV, offdiagV;
2796   PetscScalar   *a, *diagA, *offdiagA;
2797   PetscInt       r;
2798   PetscErrorCode ierr;
2799 
2800   PetscFunctionBegin;
2801   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2802   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2803   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2804   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2805   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2806   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2807   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2808   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2809   for(r = 0; r < n; ++r) {
2810     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2811       a[r]   = diagA[r];
2812       idx[r] = cstart + diagIdx[r];
2813     } else {
2814       a[r]   = offdiagA[r];
2815       idx[r] = cmap[offdiagIdx[r]];
2816     }
2817   }
2818   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2819   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2820   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2821   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2822   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2823   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2824   PetscFunctionReturn(0);
2825 }
2826 
2827 #undef __FUNCT__
2828 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2829 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2830 {
2831   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2832   PetscInt       n      = A->rmap->n;
2833   PetscInt       cstart = A->cmap->rstart;
2834   PetscInt      *cmap   = mat->garray;
2835   PetscInt      *diagIdx, *offdiagIdx;
2836   Vec            diagV, offdiagV;
2837   PetscScalar   *a, *diagA, *offdiagA;
2838   PetscInt       r;
2839   PetscErrorCode ierr;
2840 
2841   PetscFunctionBegin;
2842   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2843   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2844   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2845   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2846   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2847   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2848   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2849   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2850   for(r = 0; r < n; ++r) {
2851     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2852       a[r]   = diagA[r];
2853       idx[r] = cstart + diagIdx[r];
2854     } else {
2855       a[r]   = offdiagA[r];
2856       idx[r] = cmap[offdiagIdx[r]];
2857     }
2858   }
2859   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2860   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2861   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2862   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2863   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2864   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2865   PetscFunctionReturn(0);
2866 }
2867 
2868 #undef __FUNCT__
2869 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2870 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2871 {
2872   PetscErrorCode ierr;
2873   Mat            *dummy;
2874 
2875   PetscFunctionBegin;
2876   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2877   *newmat = *dummy;
2878   ierr = PetscFree(dummy);CHKERRQ(ierr);
2879   PetscFunctionReturn(0);
2880 }
2881 
2882 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2883 /* -------------------------------------------------------------------*/
2884 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2885        MatGetRow_MPIAIJ,
2886        MatRestoreRow_MPIAIJ,
2887        MatMult_MPIAIJ,
2888 /* 4*/ MatMultAdd_MPIAIJ,
2889        MatMultTranspose_MPIAIJ,
2890        MatMultTransposeAdd_MPIAIJ,
2891 #ifdef PETSC_HAVE_PBGL
2892        MatSolve_MPIAIJ,
2893 #else
2894        0,
2895 #endif
2896        0,
2897        0,
2898 /*10*/ 0,
2899        0,
2900        0,
2901        MatSOR_MPIAIJ,
2902        MatTranspose_MPIAIJ,
2903 /*15*/ MatGetInfo_MPIAIJ,
2904        MatEqual_MPIAIJ,
2905        MatGetDiagonal_MPIAIJ,
2906        MatDiagonalScale_MPIAIJ,
2907        MatNorm_MPIAIJ,
2908 /*20*/ MatAssemblyBegin_MPIAIJ,
2909        MatAssemblyEnd_MPIAIJ,
2910        MatSetOption_MPIAIJ,
2911        MatZeroEntries_MPIAIJ,
2912 /*24*/ MatZeroRows_MPIAIJ,
2913        0,
2914 #ifdef PETSC_HAVE_PBGL
2915        0,
2916 #else
2917        0,
2918 #endif
2919        0,
2920        0,
2921 /*29*/ MatSetUpPreallocation_MPIAIJ,
2922 #ifdef PETSC_HAVE_PBGL
2923        0,
2924 #else
2925        0,
2926 #endif
2927        0,
2928        0,
2929        0,
2930 /*34*/ MatDuplicate_MPIAIJ,
2931        0,
2932        0,
2933        0,
2934        0,
2935 /*39*/ MatAXPY_MPIAIJ,
2936        MatGetSubMatrices_MPIAIJ,
2937        MatIncreaseOverlap_MPIAIJ,
2938        MatGetValues_MPIAIJ,
2939        MatCopy_MPIAIJ,
2940 /*44*/ MatGetRowMax_MPIAIJ,
2941        MatScale_MPIAIJ,
2942        0,
2943        0,
2944        MatZeroRowsColumns_MPIAIJ,
2945 /*49*/ MatSetBlockSize_MPIAIJ,
2946        0,
2947        0,
2948        0,
2949        0,
2950 /*54*/ MatFDColoringCreate_MPIAIJ,
2951        0,
2952        MatSetUnfactored_MPIAIJ,
2953        MatPermute_MPIAIJ,
2954        0,
2955 /*59*/ MatGetSubMatrix_MPIAIJ,
2956        MatDestroy_MPIAIJ,
2957        MatView_MPIAIJ,
2958        0,
2959        0,
2960 /*64*/ 0,
2961        0,
2962        0,
2963        0,
2964        0,
2965 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2966        MatGetRowMinAbs_MPIAIJ,
2967        0,
2968        MatSetColoring_MPIAIJ,
2969 #if defined(PETSC_HAVE_ADIC)
2970        MatSetValuesAdic_MPIAIJ,
2971 #else
2972        0,
2973 #endif
2974        MatSetValuesAdifor_MPIAIJ,
2975 /*75*/ MatFDColoringApply_AIJ,
2976        0,
2977        0,
2978        0,
2979        0,
2980 /*80*/ 0,
2981        0,
2982        0,
2983 /*83*/ MatLoad_MPIAIJ,
2984        0,
2985        0,
2986        0,
2987        0,
2988        0,
2989 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2990        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2991        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2992        MatPtAP_Basic,
2993        MatPtAPSymbolic_MPIAIJ,
2994 /*94*/ MatPtAPNumeric_MPIAIJ,
2995        0,
2996        0,
2997        0,
2998        0,
2999 /*99*/ 0,
3000        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3001        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3002        MatConjugate_MPIAIJ,
3003        0,
3004 /*104*/MatSetValuesRow_MPIAIJ,
3005        MatRealPart_MPIAIJ,
3006        MatImaginaryPart_MPIAIJ,
3007        0,
3008        0,
3009 /*109*/0,
3010        MatGetRedundantMatrix_MPIAIJ,
3011        MatGetRowMin_MPIAIJ,
3012        0,
3013        0,
3014 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
3015        0,
3016        0,
3017        0,
3018        0,
3019 /*119*/0,
3020        0,
3021        0,
3022        0,
3023        MatGetMultiProcBlock_MPIAIJ,
3024 /*124*/MatFindNonZeroRows_MPIAIJ,
3025        0,
3026        0,
3027        0,
3028        MatGetSubMatricesParallel_MPIAIJ
3029 };
3030 
3031 /* ----------------------------------------------------------------------------------------*/
3032 
3033 EXTERN_C_BEGIN
3034 #undef __FUNCT__
3035 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3036 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3037 {
3038   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3039   PetscErrorCode ierr;
3040 
3041   PetscFunctionBegin;
3042   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3043   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3044   PetscFunctionReturn(0);
3045 }
3046 EXTERN_C_END
3047 
3048 EXTERN_C_BEGIN
3049 #undef __FUNCT__
3050 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3051 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3052 {
3053   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3054   PetscErrorCode ierr;
3055 
3056   PetscFunctionBegin;
3057   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3058   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3059   PetscFunctionReturn(0);
3060 }
3061 EXTERN_C_END
3062 
3063 EXTERN_C_BEGIN
3064 #undef __FUNCT__
3065 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3066 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3067 {
3068   Mat_MPIAIJ     *b;
3069   PetscErrorCode ierr;
3070   PetscInt       i;
3071 
3072   PetscFunctionBegin;
3073   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3074   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3075   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3076   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3077 
3078   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3079   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3080   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3081   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3082   if (d_nnz) {
3083     for (i=0; i<B->rmap->n; i++) {
3084       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3085     }
3086   }
3087   if (o_nnz) {
3088     for (i=0; i<B->rmap->n; i++) {
3089       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3090     }
3091   }
3092   b = (Mat_MPIAIJ*)B->data;
3093 
3094   if (!B->preallocated) {
3095     /* Explicitly create 2 MATSEQAIJ matrices. */
3096     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3097     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3098     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3099     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3100     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3101     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3102     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3103     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3104   }
3105 
3106   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3107   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3108   B->preallocated = PETSC_TRUE;
3109   PetscFunctionReturn(0);
3110 }
3111 EXTERN_C_END
3112 
3113 #undef __FUNCT__
3114 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3115 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3116 {
3117   Mat            mat;
3118   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3119   PetscErrorCode ierr;
3120 
3121   PetscFunctionBegin;
3122   *newmat       = 0;
3123   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3124   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3125   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3126   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3127   a    = (Mat_MPIAIJ*)mat->data;
3128 
3129   mat->factortype    = matin->factortype;
3130   mat->rmap->bs      = matin->rmap->bs;
3131   mat->assembled    = PETSC_TRUE;
3132   mat->insertmode   = NOT_SET_VALUES;
3133   mat->preallocated = PETSC_TRUE;
3134 
3135   a->size           = oldmat->size;
3136   a->rank           = oldmat->rank;
3137   a->donotstash     = oldmat->donotstash;
3138   a->roworiented    = oldmat->roworiented;
3139   a->rowindices     = 0;
3140   a->rowvalues      = 0;
3141   a->getrowactive   = PETSC_FALSE;
3142 
3143   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3144   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3145 
3146   if (oldmat->colmap) {
3147 #if defined (PETSC_USE_CTABLE)
3148     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3149 #else
3150     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3151     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3152     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3153 #endif
3154   } else a->colmap = 0;
3155   if (oldmat->garray) {
3156     PetscInt len;
3157     len  = oldmat->B->cmap->n;
3158     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3159     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3160     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3161   } else a->garray = 0;
3162 
3163   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3164   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3165   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3166   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3167   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3168   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3169   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3170   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3171   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3172   *newmat = mat;
3173   PetscFunctionReturn(0);
3174 }
3175 
3176 /*
3177     Allows sending/receiving larger messages then 2 gigabytes in a single call
3178 */
3179 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm)
3180 {
3181   int             ierr;
3182   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
3183   PetscInt        i,numchunks;
3184   PetscMPIInt     icnt;
3185 
3186   numchunks = cnt/CHUNKSIZE + 1;
3187   for (i=0; i<numchunks; i++) {
3188     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
3189     ierr = MPI_Send(mess,icnt,type,to,tag,comm);if (ierr) return ierr;
3190     if (type == MPIU_INT) {
3191       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
3192     } else if (type == MPIU_SCALAR) {
3193       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
3194     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
3195   }
3196   return 0;
3197 }
3198 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm)
3199 {
3200   int             ierr;
3201   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
3202   MPI_Status      status;
3203   PetscInt        i,numchunks;
3204   PetscMPIInt     icnt;
3205 
3206   numchunks = cnt/CHUNKSIZE + 1;
3207   for (i=0; i<numchunks; i++) {
3208     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
3209     ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status);if (ierr) return ierr;
3210     if (type == MPIU_INT) {
3211       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
3212     } else if (type == MPIU_SCALAR) {
3213       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
3214     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
3215   }
3216   return 0;
3217 }
3218 
3219 #undef __FUNCT__
3220 #define __FUNCT__ "MatLoad_MPIAIJ"
3221 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3222 {
3223   PetscScalar    *vals,*svals;
3224   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3225   PetscErrorCode ierr;
3226   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3227   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3228   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3229   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3230   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3231   int            fd;
3232 
3233   PetscFunctionBegin;
3234   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3235   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3236   if (!rank) {
3237     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3238     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3239     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3240   }
3241 
3242   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3243 
3244   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3245   M = header[1]; N = header[2];
3246   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3247   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3248   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3249 
3250   /* If global sizes are set, check if they are consistent with that given in the file */
3251   if (sizesset) {
3252     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3253   }
3254   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3255   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3256 
3257   /* determine ownership of all rows */
3258   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3259   else m = newMat->rmap->n; /* Set by user */
3260 
3261   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3262   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3263 
3264   /* First process needs enough room for process with most rows */
3265   if (!rank) {
3266     mmax       = rowners[1];
3267     for (i=2; i<size; i++) {
3268       mmax = PetscMax(mmax,rowners[i]);
3269     }
3270   } else mmax = m;
3271 
3272   rowners[0] = 0;
3273   for (i=2; i<=size; i++) {
3274     rowners[i] += rowners[i-1];
3275   }
3276   rstart = rowners[rank];
3277   rend   = rowners[rank+1];
3278 
3279   /* distribute row lengths to all processors */
3280   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3281   if (!rank) {
3282     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3283     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3284     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3285     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3286     for (j=0; j<m; j++) {
3287       procsnz[0] += ourlens[j];
3288     }
3289     for (i=1; i<size; i++) {
3290       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3291       /* calculate the number of nonzeros on each processor */
3292       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3293         procsnz[i] += rowlengths[j];
3294       }
3295       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3296     }
3297     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3298   } else {
3299     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3300   }
3301 
3302   if (!rank) {
3303     /* determine max buffer needed and allocate it */
3304     maxnz = 0;
3305     for (i=0; i<size; i++) {
3306       maxnz = PetscMax(maxnz,procsnz[i]);
3307     }
3308     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3309 
3310     /* read in my part of the matrix column indices  */
3311     nz   = procsnz[0];
3312     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3313     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3314 
3315     /* read in every one elses and ship off */
3316     for (i=1; i<size; i++) {
3317       nz     = procsnz[i];
3318       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3319       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3320     }
3321     ierr = PetscFree(cols);CHKERRQ(ierr);
3322   } else {
3323     /* determine buffer space needed for message */
3324     nz = 0;
3325     for (i=0; i<m; i++) {
3326       nz += ourlens[i];
3327     }
3328     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3329 
3330     /* receive message of column indices*/
3331     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3332   }
3333 
3334   /* determine column ownership if matrix is not square */
3335   if (N != M) {
3336     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3337     else n = newMat->cmap->n;
3338     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3339     cstart = cend - n;
3340   } else {
3341     cstart = rstart;
3342     cend   = rend;
3343     n      = cend - cstart;
3344   }
3345 
3346   /* loop over local rows, determining number of off diagonal entries */
3347   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3348   jj = 0;
3349   for (i=0; i<m; i++) {
3350     for (j=0; j<ourlens[i]; j++) {
3351       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3352       jj++;
3353     }
3354   }
3355 
3356   for (i=0; i<m; i++) {
3357     ourlens[i] -= offlens[i];
3358   }
3359   if (!sizesset) {
3360     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3361   }
3362   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3363 
3364   for (i=0; i<m; i++) {
3365     ourlens[i] += offlens[i];
3366   }
3367 
3368   if (!rank) {
3369     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3370 
3371     /* read in my part of the matrix numerical values  */
3372     nz   = procsnz[0];
3373     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3374 
3375     /* insert into matrix */
3376     jj      = rstart;
3377     smycols = mycols;
3378     svals   = vals;
3379     for (i=0; i<m; i++) {
3380       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3381       smycols += ourlens[i];
3382       svals   += ourlens[i];
3383       jj++;
3384     }
3385 
3386     /* read in other processors and ship out */
3387     for (i=1; i<size; i++) {
3388       nz     = procsnz[i];
3389       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3390       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3391     }
3392     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3393   } else {
3394     /* receive numeric values */
3395     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3396 
3397     /* receive message of values*/
3398     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3399 
3400     /* insert into matrix */
3401     jj      = rstart;
3402     smycols = mycols;
3403     svals   = vals;
3404     for (i=0; i<m; i++) {
3405       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3406       smycols += ourlens[i];
3407       svals   += ourlens[i];
3408       jj++;
3409     }
3410   }
3411   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3412   ierr = PetscFree(vals);CHKERRQ(ierr);
3413   ierr = PetscFree(mycols);CHKERRQ(ierr);
3414   ierr = PetscFree(rowners);CHKERRQ(ierr);
3415 
3416   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3417   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3418   PetscFunctionReturn(0);
3419 }
3420 
3421 #undef __FUNCT__
3422 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3423 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3424 {
3425   PetscErrorCode ierr;
3426   IS             iscol_local;
3427   PetscInt       csize;
3428 
3429   PetscFunctionBegin;
3430   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3431   if (call == MAT_REUSE_MATRIX) {
3432     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3433     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3434   } else {
3435     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3436   }
3437   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3438   if (call == MAT_INITIAL_MATRIX) {
3439     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3440     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3441   }
3442   PetscFunctionReturn(0);
3443 }
3444 
3445 #undef __FUNCT__
3446 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3447 /*
3448     Not great since it makes two copies of the submatrix, first an SeqAIJ
3449   in local and then by concatenating the local matrices the end result.
3450   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3451 
3452   Note: This requires a sequential iscol with all indices.
3453 */
3454 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3455 {
3456   PetscErrorCode ierr;
3457   PetscMPIInt    rank,size;
3458   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3459   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3460   Mat            *local,M,Mreuse;
3461   MatScalar      *vwork,*aa;
3462   MPI_Comm       comm = ((PetscObject)mat)->comm;
3463   Mat_SeqAIJ     *aij;
3464 
3465 
3466   PetscFunctionBegin;
3467   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3468   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3469 
3470   if (call ==  MAT_REUSE_MATRIX) {
3471     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3472     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3473     local = &Mreuse;
3474     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3475   } else {
3476     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3477     Mreuse = *local;
3478     ierr   = PetscFree(local);CHKERRQ(ierr);
3479   }
3480 
3481   /*
3482       m - number of local rows
3483       n - number of columns (same on all processors)
3484       rstart - first row in new global matrix generated
3485   */
3486   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3487   if (call == MAT_INITIAL_MATRIX) {
3488     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3489     ii  = aij->i;
3490     jj  = aij->j;
3491 
3492     /*
3493         Determine the number of non-zeros in the diagonal and off-diagonal
3494         portions of the matrix in order to do correct preallocation
3495     */
3496 
3497     /* first get start and end of "diagonal" columns */
3498     if (csize == PETSC_DECIDE) {
3499       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3500       if (mglobal == n) { /* square matrix */
3501 	nlocal = m;
3502       } else {
3503         nlocal = n/size + ((n % size) > rank);
3504       }
3505     } else {
3506       nlocal = csize;
3507     }
3508     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3509     rstart = rend - nlocal;
3510     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3511 
3512     /* next, compute all the lengths */
3513     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3514     olens = dlens + m;
3515     for (i=0; i<m; i++) {
3516       jend = ii[i+1] - ii[i];
3517       olen = 0;
3518       dlen = 0;
3519       for (j=0; j<jend; j++) {
3520         if (*jj < rstart || *jj >= rend) olen++;
3521         else dlen++;
3522         jj++;
3523       }
3524       olens[i] = olen;
3525       dlens[i] = dlen;
3526     }
3527     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3528     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3529     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3530     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3531     ierr = PetscFree(dlens);CHKERRQ(ierr);
3532   } else {
3533     PetscInt ml,nl;
3534 
3535     M = *newmat;
3536     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3537     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3538     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3539     /*
3540          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3541        rather than the slower MatSetValues().
3542     */
3543     M->was_assembled = PETSC_TRUE;
3544     M->assembled     = PETSC_FALSE;
3545   }
3546   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3547   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3548   ii  = aij->i;
3549   jj  = aij->j;
3550   aa  = aij->a;
3551   for (i=0; i<m; i++) {
3552     row   = rstart + i;
3553     nz    = ii[i+1] - ii[i];
3554     cwork = jj;     jj += nz;
3555     vwork = aa;     aa += nz;
3556     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3557   }
3558 
3559   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3560   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3561   *newmat = M;
3562 
3563   /* save submatrix used in processor for next request */
3564   if (call ==  MAT_INITIAL_MATRIX) {
3565     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3566     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3567   }
3568 
3569   PetscFunctionReturn(0);
3570 }
3571 
3572 EXTERN_C_BEGIN
3573 #undef __FUNCT__
3574 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3575 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3576 {
3577   PetscInt       m,cstart, cend,j,nnz,i,d;
3578   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3579   const PetscInt *JJ;
3580   PetscScalar    *values;
3581   PetscErrorCode ierr;
3582 
3583   PetscFunctionBegin;
3584   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3585 
3586   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3587   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3588   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3589   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3590   m      = B->rmap->n;
3591   cstart = B->cmap->rstart;
3592   cend   = B->cmap->rend;
3593   rstart = B->rmap->rstart;
3594 
3595   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3596 
3597 #if defined(PETSC_USE_DEBUGGING)
3598   for (i=0; i<m; i++) {
3599     nnz     = Ii[i+1]- Ii[i];
3600     JJ      = J + Ii[i];
3601     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3602     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3603     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3604   }
3605 #endif
3606 
3607   for (i=0; i<m; i++) {
3608     nnz     = Ii[i+1]- Ii[i];
3609     JJ      = J + Ii[i];
3610     nnz_max = PetscMax(nnz_max,nnz);
3611     d       = 0;
3612     for (j=0; j<nnz; j++) {
3613       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3614     }
3615     d_nnz[i] = d;
3616     o_nnz[i] = nnz - d;
3617   }
3618   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3619   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3620 
3621   if (v) values = (PetscScalar*)v;
3622   else {
3623     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3624     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3625   }
3626 
3627   for (i=0; i<m; i++) {
3628     ii   = i + rstart;
3629     nnz  = Ii[i+1]- Ii[i];
3630     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3631   }
3632   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3633   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3634 
3635   if (!v) {
3636     ierr = PetscFree(values);CHKERRQ(ierr);
3637   }
3638   PetscFunctionReturn(0);
3639 }
3640 EXTERN_C_END
3641 
3642 #undef __FUNCT__
3643 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3644 /*@
3645    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3646    (the default parallel PETSc format).
3647 
3648    Collective on MPI_Comm
3649 
3650    Input Parameters:
3651 +  B - the matrix
3652 .  i - the indices into j for the start of each local row (starts with zero)
3653 .  j - the column indices for each local row (starts with zero)
3654 -  v - optional values in the matrix
3655 
3656    Level: developer
3657 
3658    Notes:
3659        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3660      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3661      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3662 
3663        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3664 
3665        The format which is used for the sparse matrix input, is equivalent to a
3666     row-major ordering.. i.e for the following matrix, the input data expected is
3667     as shown:
3668 
3669         1 0 0
3670         2 0 3     P0
3671        -------
3672         4 5 6     P1
3673 
3674      Process0 [P0]: rows_owned=[0,1]
3675         i =  {0,1,3}  [size = nrow+1  = 2+1]
3676         j =  {0,0,2}  [size = nz = 6]
3677         v =  {1,2,3}  [size = nz = 6]
3678 
3679      Process1 [P1]: rows_owned=[2]
3680         i =  {0,3}    [size = nrow+1  = 1+1]
3681         j =  {0,1,2}  [size = nz = 6]
3682         v =  {4,5,6}  [size = nz = 6]
3683 
3684 .keywords: matrix, aij, compressed row, sparse, parallel
3685 
3686 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3687           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3688 @*/
3689 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3690 {
3691   PetscErrorCode ierr;
3692 
3693   PetscFunctionBegin;
3694   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3695   PetscFunctionReturn(0);
3696 }
3697 
3698 #undef __FUNCT__
3699 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3700 /*@C
3701    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3702    (the default parallel PETSc format).  For good matrix assembly performance
3703    the user should preallocate the matrix storage by setting the parameters
3704    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3705    performance can be increased by more than a factor of 50.
3706 
3707    Collective on MPI_Comm
3708 
3709    Input Parameters:
3710 +  A - the matrix
3711 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3712            (same value is used for all local rows)
3713 .  d_nnz - array containing the number of nonzeros in the various rows of the
3714            DIAGONAL portion of the local submatrix (possibly different for each row)
3715            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3716            The size of this array is equal to the number of local rows, i.e 'm'.
3717            You must leave room for the diagonal entry even if it is zero.
3718 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3719            submatrix (same value is used for all local rows).
3720 -  o_nnz - array containing the number of nonzeros in the various rows of the
3721            OFF-DIAGONAL portion of the local submatrix (possibly different for
3722            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3723            structure. The size of this array is equal to the number
3724            of local rows, i.e 'm'.
3725 
3726    If the *_nnz parameter is given then the *_nz parameter is ignored
3727 
3728    The AIJ format (also called the Yale sparse matrix format or
3729    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3730    storage.  The stored row and column indices begin with zero.
3731    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3732 
3733    The parallel matrix is partitioned such that the first m0 rows belong to
3734    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3735    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3736 
3737    The DIAGONAL portion of the local submatrix of a processor can be defined
3738    as the submatrix which is obtained by extraction the part corresponding to
3739    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3740    first row that belongs to the processor, r2 is the last row belonging to
3741    the this processor, and c1-c2 is range of indices of the local part of a
3742    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3743    common case of a square matrix, the row and column ranges are the same and
3744    the DIAGONAL part is also square. The remaining portion of the local
3745    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3746 
3747    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3748 
3749    You can call MatGetInfo() to get information on how effective the preallocation was;
3750    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3751    You can also run with the option -info and look for messages with the string
3752    malloc in them to see if additional memory allocation was needed.
3753 
3754    Example usage:
3755 
3756    Consider the following 8x8 matrix with 34 non-zero values, that is
3757    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3758    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3759    as follows:
3760 
3761 .vb
3762             1  2  0  |  0  3  0  |  0  4
3763     Proc0   0  5  6  |  7  0  0  |  8  0
3764             9  0 10  | 11  0  0  | 12  0
3765     -------------------------------------
3766            13  0 14  | 15 16 17  |  0  0
3767     Proc1   0 18  0  | 19 20 21  |  0  0
3768             0  0  0  | 22 23  0  | 24  0
3769     -------------------------------------
3770     Proc2  25 26 27  |  0  0 28  | 29  0
3771            30  0  0  | 31 32 33  |  0 34
3772 .ve
3773 
3774    This can be represented as a collection of submatrices as:
3775 
3776 .vb
3777       A B C
3778       D E F
3779       G H I
3780 .ve
3781 
3782    Where the submatrices A,B,C are owned by proc0, D,E,F are
3783    owned by proc1, G,H,I are owned by proc2.
3784 
3785    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3786    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3787    The 'M','N' parameters are 8,8, and have the same values on all procs.
3788 
3789    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3790    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3791    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3792    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3793    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3794    matrix, ans [DF] as another SeqAIJ matrix.
3795 
3796    When d_nz, o_nz parameters are specified, d_nz storage elements are
3797    allocated for every row of the local diagonal submatrix, and o_nz
3798    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3799    One way to choose d_nz and o_nz is to use the max nonzerors per local
3800    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3801    In this case, the values of d_nz,o_nz are:
3802 .vb
3803      proc0 : dnz = 2, o_nz = 2
3804      proc1 : dnz = 3, o_nz = 2
3805      proc2 : dnz = 1, o_nz = 4
3806 .ve
3807    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3808    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3809    for proc3. i.e we are using 12+15+10=37 storage locations to store
3810    34 values.
3811 
3812    When d_nnz, o_nnz parameters are specified, the storage is specified
3813    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3814    In the above case the values for d_nnz,o_nnz are:
3815 .vb
3816      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3817      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3818      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3819 .ve
3820    Here the space allocated is sum of all the above values i.e 34, and
3821    hence pre-allocation is perfect.
3822 
3823    Level: intermediate
3824 
3825 .keywords: matrix, aij, compressed row, sparse, parallel
3826 
3827 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3828           MPIAIJ, MatGetInfo()
3829 @*/
3830 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3831 {
3832   PetscErrorCode ierr;
3833 
3834   PetscFunctionBegin;
3835   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3836   PetscFunctionReturn(0);
3837 }
3838 
3839 #undef __FUNCT__
3840 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3841 /*@
3842      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3843          CSR format the local rows.
3844 
3845    Collective on MPI_Comm
3846 
3847    Input Parameters:
3848 +  comm - MPI communicator
3849 .  m - number of local rows (Cannot be PETSC_DECIDE)
3850 .  n - This value should be the same as the local size used in creating the
3851        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3852        calculated if N is given) For square matrices n is almost always m.
3853 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3854 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3855 .   i - row indices
3856 .   j - column indices
3857 -   a - matrix values
3858 
3859    Output Parameter:
3860 .   mat - the matrix
3861 
3862    Level: intermediate
3863 
3864    Notes:
3865        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3866      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3867      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3868 
3869        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3870 
3871        The format which is used for the sparse matrix input, is equivalent to a
3872     row-major ordering.. i.e for the following matrix, the input data expected is
3873     as shown:
3874 
3875         1 0 0
3876         2 0 3     P0
3877        -------
3878         4 5 6     P1
3879 
3880      Process0 [P0]: rows_owned=[0,1]
3881         i =  {0,1,3}  [size = nrow+1  = 2+1]
3882         j =  {0,0,2}  [size = nz = 6]
3883         v =  {1,2,3}  [size = nz = 6]
3884 
3885      Process1 [P1]: rows_owned=[2]
3886         i =  {0,3}    [size = nrow+1  = 1+1]
3887         j =  {0,1,2}  [size = nz = 6]
3888         v =  {4,5,6}  [size = nz = 6]
3889 
3890 .keywords: matrix, aij, compressed row, sparse, parallel
3891 
3892 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3893           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3894 @*/
3895 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3896 {
3897   PetscErrorCode ierr;
3898 
3899  PetscFunctionBegin;
3900   if (i[0]) {
3901     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3902   }
3903   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3904   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3905   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3906   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3907   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3908   PetscFunctionReturn(0);
3909 }
3910 
3911 #undef __FUNCT__
3912 #define __FUNCT__ "MatCreateMPIAIJ"
3913 /*@C
3914    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3915    (the default parallel PETSc format).  For good matrix assembly performance
3916    the user should preallocate the matrix storage by setting the parameters
3917    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3918    performance can be increased by more than a factor of 50.
3919 
3920    Collective on MPI_Comm
3921 
3922    Input Parameters:
3923 +  comm - MPI communicator
3924 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3925            This value should be the same as the local size used in creating the
3926            y vector for the matrix-vector product y = Ax.
3927 .  n - This value should be the same as the local size used in creating the
3928        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3929        calculated if N is given) For square matrices n is almost always m.
3930 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3931 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3932 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3933            (same value is used for all local rows)
3934 .  d_nnz - array containing the number of nonzeros in the various rows of the
3935            DIAGONAL portion of the local submatrix (possibly different for each row)
3936            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3937            The size of this array is equal to the number of local rows, i.e 'm'.
3938            You must leave room for the diagonal entry even if it is zero.
3939 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3940            submatrix (same value is used for all local rows).
3941 -  o_nnz - array containing the number of nonzeros in the various rows of the
3942            OFF-DIAGONAL portion of the local submatrix (possibly different for
3943            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3944            structure. The size of this array is equal to the number
3945            of local rows, i.e 'm'.
3946 
3947    Output Parameter:
3948 .  A - the matrix
3949 
3950    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3951    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3952    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3953 
3954    Notes:
3955    If the *_nnz parameter is given then the *_nz parameter is ignored
3956 
3957    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3958    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3959    storage requirements for this matrix.
3960 
3961    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3962    processor than it must be used on all processors that share the object for
3963    that argument.
3964 
3965    The user MUST specify either the local or global matrix dimensions
3966    (possibly both).
3967 
3968    The parallel matrix is partitioned across processors such that the
3969    first m0 rows belong to process 0, the next m1 rows belong to
3970    process 1, the next m2 rows belong to process 2 etc.. where
3971    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3972    values corresponding to [m x N] submatrix.
3973 
3974    The columns are logically partitioned with the n0 columns belonging
3975    to 0th partition, the next n1 columns belonging to the next
3976    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3977 
3978    The DIAGONAL portion of the local submatrix on any given processor
3979    is the submatrix corresponding to the rows and columns m,n
3980    corresponding to the given processor. i.e diagonal matrix on
3981    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3982    etc. The remaining portion of the local submatrix [m x (N-n)]
3983    constitute the OFF-DIAGONAL portion. The example below better
3984    illustrates this concept.
3985 
3986    For a square global matrix we define each processor's diagonal portion
3987    to be its local rows and the corresponding columns (a square submatrix);
3988    each processor's off-diagonal portion encompasses the remainder of the
3989    local matrix (a rectangular submatrix).
3990 
3991    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3992 
3993    When calling this routine with a single process communicator, a matrix of
3994    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3995    type of communicator, use the construction mechanism:
3996      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3997 
3998    By default, this format uses inodes (identical nodes) when possible.
3999    We search for consecutive rows with the same nonzero structure, thereby
4000    reusing matrix information to achieve increased efficiency.
4001 
4002    Options Database Keys:
4003 +  -mat_no_inode  - Do not use inodes
4004 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4005 -  -mat_aij_oneindex - Internally use indexing starting at 1
4006         rather than 0.  Note that when calling MatSetValues(),
4007         the user still MUST index entries starting at 0!
4008 
4009 
4010    Example usage:
4011 
4012    Consider the following 8x8 matrix with 34 non-zero values, that is
4013    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4014    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4015    as follows:
4016 
4017 .vb
4018             1  2  0  |  0  3  0  |  0  4
4019     Proc0   0  5  6  |  7  0  0  |  8  0
4020             9  0 10  | 11  0  0  | 12  0
4021     -------------------------------------
4022            13  0 14  | 15 16 17  |  0  0
4023     Proc1   0 18  0  | 19 20 21  |  0  0
4024             0  0  0  | 22 23  0  | 24  0
4025     -------------------------------------
4026     Proc2  25 26 27  |  0  0 28  | 29  0
4027            30  0  0  | 31 32 33  |  0 34
4028 .ve
4029 
4030    This can be represented as a collection of submatrices as:
4031 
4032 .vb
4033       A B C
4034       D E F
4035       G H I
4036 .ve
4037 
4038    Where the submatrices A,B,C are owned by proc0, D,E,F are
4039    owned by proc1, G,H,I are owned by proc2.
4040 
4041    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4042    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4043    The 'M','N' parameters are 8,8, and have the same values on all procs.
4044 
4045    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4046    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4047    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4048    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4049    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4050    matrix, ans [DF] as another SeqAIJ matrix.
4051 
4052    When d_nz, o_nz parameters are specified, d_nz storage elements are
4053    allocated for every row of the local diagonal submatrix, and o_nz
4054    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4055    One way to choose d_nz and o_nz is to use the max nonzerors per local
4056    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4057    In this case, the values of d_nz,o_nz are:
4058 .vb
4059      proc0 : dnz = 2, o_nz = 2
4060      proc1 : dnz = 3, o_nz = 2
4061      proc2 : dnz = 1, o_nz = 4
4062 .ve
4063    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4064    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4065    for proc3. i.e we are using 12+15+10=37 storage locations to store
4066    34 values.
4067 
4068    When d_nnz, o_nnz parameters are specified, the storage is specified
4069    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4070    In the above case the values for d_nnz,o_nnz are:
4071 .vb
4072      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4073      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4074      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4075 .ve
4076    Here the space allocated is sum of all the above values i.e 34, and
4077    hence pre-allocation is perfect.
4078 
4079    Level: intermediate
4080 
4081 .keywords: matrix, aij, compressed row, sparse, parallel
4082 
4083 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4084           MPIAIJ, MatCreateMPIAIJWithArrays()
4085 @*/
4086 PetscErrorCode  MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4087 {
4088   PetscErrorCode ierr;
4089   PetscMPIInt    size;
4090 
4091   PetscFunctionBegin;
4092   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4093   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4094   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4095   if (size > 1) {
4096     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4097     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4098   } else {
4099     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4100     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4101   }
4102   PetscFunctionReturn(0);
4103 }
4104 
4105 #undef __FUNCT__
4106 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4107 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4108 {
4109   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4110 
4111   PetscFunctionBegin;
4112   *Ad     = a->A;
4113   *Ao     = a->B;
4114   *colmap = a->garray;
4115   PetscFunctionReturn(0);
4116 }
4117 
4118 #undef __FUNCT__
4119 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4120 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4121 {
4122   PetscErrorCode ierr;
4123   PetscInt       i;
4124   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4125 
4126   PetscFunctionBegin;
4127   if (coloring->ctype == IS_COLORING_GLOBAL) {
4128     ISColoringValue *allcolors,*colors;
4129     ISColoring      ocoloring;
4130 
4131     /* set coloring for diagonal portion */
4132     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4133 
4134     /* set coloring for off-diagonal portion */
4135     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4136     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4137     for (i=0; i<a->B->cmap->n; i++) {
4138       colors[i] = allcolors[a->garray[i]];
4139     }
4140     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4141     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4142     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4143     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4144   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4145     ISColoringValue *colors;
4146     PetscInt        *larray;
4147     ISColoring      ocoloring;
4148 
4149     /* set coloring for diagonal portion */
4150     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4151     for (i=0; i<a->A->cmap->n; i++) {
4152       larray[i] = i + A->cmap->rstart;
4153     }
4154     ierr = ISGlobalToLocalMappingApply(A->cmapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4155     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4156     for (i=0; i<a->A->cmap->n; i++) {
4157       colors[i] = coloring->colors[larray[i]];
4158     }
4159     ierr = PetscFree(larray);CHKERRQ(ierr);
4160     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4161     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4162     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4163 
4164     /* set coloring for off-diagonal portion */
4165     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4166     ierr = ISGlobalToLocalMappingApply(A->cmapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4167     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4168     for (i=0; i<a->B->cmap->n; i++) {
4169       colors[i] = coloring->colors[larray[i]];
4170     }
4171     ierr = PetscFree(larray);CHKERRQ(ierr);
4172     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4173     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4174     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4175   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4176 
4177   PetscFunctionReturn(0);
4178 }
4179 
4180 #if defined(PETSC_HAVE_ADIC)
4181 #undef __FUNCT__
4182 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4183 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4184 {
4185   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4186   PetscErrorCode ierr;
4187 
4188   PetscFunctionBegin;
4189   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4190   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4191   PetscFunctionReturn(0);
4192 }
4193 #endif
4194 
4195 #undef __FUNCT__
4196 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4197 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4198 {
4199   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4200   PetscErrorCode ierr;
4201 
4202   PetscFunctionBegin;
4203   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4204   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4205   PetscFunctionReturn(0);
4206 }
4207 
4208 #undef __FUNCT__
4209 #define __FUNCT__ "MatMerge"
4210 /*@
4211       MatMerge - Creates a single large PETSc matrix by concatinating sequential
4212                  matrices from each processor
4213 
4214     Collective on MPI_Comm
4215 
4216    Input Parameters:
4217 +    comm - the communicators the parallel matrix will live on
4218 .    inmat - the input sequential matrices
4219 .    n - number of local columns (or PETSC_DECIDE)
4220 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4221 
4222    Output Parameter:
4223 .    outmat - the parallel matrix generated
4224 
4225     Level: advanced
4226 
4227    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4228 
4229 @*/
4230 PetscErrorCode  MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4231 {
4232   PetscErrorCode ierr;
4233   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
4234   PetscInt       *indx;
4235   PetscScalar    *values;
4236 
4237   PetscFunctionBegin;
4238   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4239   if (scall == MAT_INITIAL_MATRIX){
4240     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
4241     if (n == PETSC_DECIDE){
4242       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4243     }
4244     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4245     rstart -= m;
4246 
4247     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4248     for (i=0;i<m;i++) {
4249       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4250       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4251       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4252     }
4253     /* This routine will ONLY return MPIAIJ type matrix */
4254     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4255     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4256     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4257     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4258     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4259 
4260   } else if (scall == MAT_REUSE_MATRIX){
4261     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4262   } else {
4263     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4264   }
4265 
4266   for (i=0;i<m;i++) {
4267     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4268     Ii    = i + rstart;
4269     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4270     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4271   }
4272   ierr = MatDestroy(&inmat);CHKERRQ(ierr);
4273   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4274   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4275 
4276   PetscFunctionReturn(0);
4277 }
4278 
4279 #undef __FUNCT__
4280 #define __FUNCT__ "MatFileSplit"
4281 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4282 {
4283   PetscErrorCode    ierr;
4284   PetscMPIInt       rank;
4285   PetscInt          m,N,i,rstart,nnz;
4286   size_t            len;
4287   const PetscInt    *indx;
4288   PetscViewer       out;
4289   char              *name;
4290   Mat               B;
4291   const PetscScalar *values;
4292 
4293   PetscFunctionBegin;
4294   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4295   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4296   /* Should this be the type of the diagonal block of A? */
4297   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4298   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4299   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4300   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4301   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4302   for (i=0;i<m;i++) {
4303     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4304     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4305     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4306   }
4307   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4308   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4309 
4310   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4311   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4312   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4313   sprintf(name,"%s.%d",outfile,rank);
4314   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4315   ierr = PetscFree(name);
4316   ierr = MatView(B,out);CHKERRQ(ierr);
4317   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4318   ierr = MatDestroy(&B);CHKERRQ(ierr);
4319   PetscFunctionReturn(0);
4320 }
4321 
4322 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4323 #undef __FUNCT__
4324 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4325 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4326 {
4327   PetscErrorCode       ierr;
4328   Mat_Merge_SeqsToMPI  *merge;
4329   PetscContainer       container;
4330 
4331   PetscFunctionBegin;
4332   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4333   if (container) {
4334     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4335     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4336     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4337     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4338     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4339     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4340     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4341     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4342     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4343     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4344     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4345     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4346     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4347     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4348 
4349     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4350     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4351   }
4352   ierr = PetscFree(merge);CHKERRQ(ierr);
4353 
4354   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4355   PetscFunctionReturn(0);
4356 }
4357 
4358 #include <../src/mat/utils/freespace.h>
4359 #include <petscbt.h>
4360 
4361 #undef __FUNCT__
4362 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4363 /*@C
4364       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4365                  matrices from each processor
4366 
4367     Collective on MPI_Comm
4368 
4369    Input Parameters:
4370 +    comm - the communicators the parallel matrix will live on
4371 .    seqmat - the input sequential matrices
4372 .    m - number of local rows (or PETSC_DECIDE)
4373 .    n - number of local columns (or PETSC_DECIDE)
4374 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4375 
4376    Output Parameter:
4377 .    mpimat - the parallel matrix generated
4378 
4379     Level: advanced
4380 
4381    Notes:
4382      The dimensions of the sequential matrix in each processor MUST be the same.
4383      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4384      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4385 @*/
4386 PetscErrorCode  MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4387 {
4388   PetscErrorCode       ierr;
4389   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4390   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4391   PetscMPIInt          size,rank,taga,*len_s;
4392   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4393   PetscInt             proc,m;
4394   PetscInt             **buf_ri,**buf_rj;
4395   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4396   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4397   MPI_Request          *s_waits,*r_waits;
4398   MPI_Status           *status;
4399   MatScalar            *aa=a->a;
4400   MatScalar            **abuf_r,*ba_i;
4401   Mat_Merge_SeqsToMPI  *merge;
4402   PetscContainer       container;
4403 
4404   PetscFunctionBegin;
4405   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4406 
4407   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4408   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4409 
4410   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4411   if (container) {
4412     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4413   }
4414   bi     = merge->bi;
4415   bj     = merge->bj;
4416   buf_ri = merge->buf_ri;
4417   buf_rj = merge->buf_rj;
4418 
4419   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4420   owners = merge->rowmap->range;
4421   len_s  = merge->len_s;
4422 
4423   /* send and recv matrix values */
4424   /*-----------------------------*/
4425   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4426   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4427 
4428   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4429   for (proc=0,k=0; proc<size; proc++){
4430     if (!len_s[proc]) continue;
4431     i = owners[proc];
4432     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4433     k++;
4434   }
4435 
4436   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4437   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4438   ierr = PetscFree(status);CHKERRQ(ierr);
4439 
4440   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4441   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4442 
4443   /* insert mat values of mpimat */
4444   /*----------------------------*/
4445   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4446   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4447 
4448   for (k=0; k<merge->nrecv; k++){
4449     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4450     nrows = *(buf_ri_k[k]);
4451     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4452     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4453   }
4454 
4455   /* set values of ba */
4456   m = merge->rowmap->n;
4457   for (i=0; i<m; i++) {
4458     arow = owners[rank] + i;
4459     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4460     bnzi = bi[i+1] - bi[i];
4461     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4462 
4463     /* add local non-zero vals of this proc's seqmat into ba */
4464     anzi = ai[arow+1] - ai[arow];
4465     aj   = a->j + ai[arow];
4466     aa   = a->a + ai[arow];
4467     nextaj = 0;
4468     for (j=0; nextaj<anzi; j++){
4469       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4470         ba_i[j] += aa[nextaj++];
4471       }
4472     }
4473 
4474     /* add received vals into ba */
4475     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4476       /* i-th row */
4477       if (i == *nextrow[k]) {
4478         anzi = *(nextai[k]+1) - *nextai[k];
4479         aj   = buf_rj[k] + *(nextai[k]);
4480         aa   = abuf_r[k] + *(nextai[k]);
4481         nextaj = 0;
4482         for (j=0; nextaj<anzi; j++){
4483           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4484             ba_i[j] += aa[nextaj++];
4485           }
4486         }
4487         nextrow[k]++; nextai[k]++;
4488       }
4489     }
4490     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4491   }
4492   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4493   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4494 
4495   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4496   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4497   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4498   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4499   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4500   PetscFunctionReturn(0);
4501 }
4502 
4503 #undef __FUNCT__
4504 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4505 PetscErrorCode  MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4506 {
4507   PetscErrorCode       ierr;
4508   Mat                  B_mpi;
4509   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4510   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4511   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4512   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4513   PetscInt             len,proc,*dnz,*onz;
4514   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4515   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4516   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4517   MPI_Status           *status;
4518   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4519   PetscBT              lnkbt;
4520   Mat_Merge_SeqsToMPI  *merge;
4521   PetscContainer       container;
4522 
4523   PetscFunctionBegin;
4524   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4525 
4526   /* make sure it is a PETSc comm */
4527   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4528   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4529   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4530 
4531   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4532   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4533 
4534   /* determine row ownership */
4535   /*---------------------------------------------------------*/
4536   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4537   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4538   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4539   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4540   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4541   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4542   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4543 
4544   m      = merge->rowmap->n;
4545   M      = merge->rowmap->N;
4546   owners = merge->rowmap->range;
4547 
4548   /* determine the number of messages to send, their lengths */
4549   /*---------------------------------------------------------*/
4550   len_s  = merge->len_s;
4551 
4552   len = 0;  /* length of buf_si[] */
4553   merge->nsend = 0;
4554   for (proc=0; proc<size; proc++){
4555     len_si[proc] = 0;
4556     if (proc == rank){
4557       len_s[proc] = 0;
4558     } else {
4559       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4560       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4561     }
4562     if (len_s[proc]) {
4563       merge->nsend++;
4564       nrows = 0;
4565       for (i=owners[proc]; i<owners[proc+1]; i++){
4566         if (ai[i+1] > ai[i]) nrows++;
4567       }
4568       len_si[proc] = 2*(nrows+1);
4569       len += len_si[proc];
4570     }
4571   }
4572 
4573   /* determine the number and length of messages to receive for ij-structure */
4574   /*-------------------------------------------------------------------------*/
4575   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4576   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4577 
4578   /* post the Irecv of j-structure */
4579   /*-------------------------------*/
4580   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4581   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4582 
4583   /* post the Isend of j-structure */
4584   /*--------------------------------*/
4585   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4586 
4587   for (proc=0, k=0; proc<size; proc++){
4588     if (!len_s[proc]) continue;
4589     i = owners[proc];
4590     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4591     k++;
4592   }
4593 
4594   /* receives and sends of j-structure are complete */
4595   /*------------------------------------------------*/
4596   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4597   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4598 
4599   /* send and recv i-structure */
4600   /*---------------------------*/
4601   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4602   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4603 
4604   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4605   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4606   for (proc=0,k=0; proc<size; proc++){
4607     if (!len_s[proc]) continue;
4608     /* form outgoing message for i-structure:
4609          buf_si[0]:                 nrows to be sent
4610                [1:nrows]:           row index (global)
4611                [nrows+1:2*nrows+1]: i-structure index
4612     */
4613     /*-------------------------------------------*/
4614     nrows = len_si[proc]/2 - 1;
4615     buf_si_i    = buf_si + nrows+1;
4616     buf_si[0]   = nrows;
4617     buf_si_i[0] = 0;
4618     nrows = 0;
4619     for (i=owners[proc]; i<owners[proc+1]; i++){
4620       anzi = ai[i+1] - ai[i];
4621       if (anzi) {
4622         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4623         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4624         nrows++;
4625       }
4626     }
4627     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4628     k++;
4629     buf_si += len_si[proc];
4630   }
4631 
4632   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4633   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4634 
4635   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4636   for (i=0; i<merge->nrecv; i++){
4637     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4638   }
4639 
4640   ierr = PetscFree(len_si);CHKERRQ(ierr);
4641   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4642   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4643   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4644   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4645   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4646   ierr = PetscFree(status);CHKERRQ(ierr);
4647 
4648   /* compute a local seq matrix in each processor */
4649   /*----------------------------------------------*/
4650   /* allocate bi array and free space for accumulating nonzero column info */
4651   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4652   bi[0] = 0;
4653 
4654   /* create and initialize a linked list */
4655   nlnk = N+1;
4656   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4657 
4658   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4659   len = 0;
4660   len  = ai[owners[rank+1]] - ai[owners[rank]];
4661   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4662   current_space = free_space;
4663 
4664   /* determine symbolic info for each local row */
4665   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4666 
4667   for (k=0; k<merge->nrecv; k++){
4668     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4669     nrows = *buf_ri_k[k];
4670     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4671     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4672   }
4673 
4674   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4675   len = 0;
4676   for (i=0;i<m;i++) {
4677     bnzi   = 0;
4678     /* add local non-zero cols of this proc's seqmat into lnk */
4679     arow   = owners[rank] + i;
4680     anzi   = ai[arow+1] - ai[arow];
4681     aj     = a->j + ai[arow];
4682     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4683     bnzi += nlnk;
4684     /* add received col data into lnk */
4685     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4686       if (i == *nextrow[k]) { /* i-th row */
4687         anzi = *(nextai[k]+1) - *nextai[k];
4688         aj   = buf_rj[k] + *nextai[k];
4689         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4690         bnzi += nlnk;
4691         nextrow[k]++; nextai[k]++;
4692       }
4693     }
4694     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4695 
4696     /* if free space is not available, make more free space */
4697     if (current_space->local_remaining<bnzi) {
4698       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4699       nspacedouble++;
4700     }
4701     /* copy data into free space, then initialize lnk */
4702     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4703     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4704 
4705     current_space->array           += bnzi;
4706     current_space->local_used      += bnzi;
4707     current_space->local_remaining -= bnzi;
4708 
4709     bi[i+1] = bi[i] + bnzi;
4710   }
4711 
4712   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4713 
4714   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4715   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4716   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4717 
4718   /* create symbolic parallel matrix B_mpi */
4719   /*---------------------------------------*/
4720   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4721   if (n==PETSC_DECIDE) {
4722     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4723   } else {
4724     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4725   }
4726   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4727   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4728   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4729 
4730   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4731   B_mpi->assembled     = PETSC_FALSE;
4732   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4733   merge->bi            = bi;
4734   merge->bj            = bj;
4735   merge->buf_ri        = buf_ri;
4736   merge->buf_rj        = buf_rj;
4737   merge->coi           = PETSC_NULL;
4738   merge->coj           = PETSC_NULL;
4739   merge->owners_co     = PETSC_NULL;
4740 
4741   /* attach the supporting struct to B_mpi for reuse */
4742   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4743   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4744   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4745   *mpimat = B_mpi;
4746 
4747   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4748   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4749   PetscFunctionReturn(0);
4750 }
4751 
4752 #undef __FUNCT__
4753 #define __FUNCT__ "MatMerge_SeqsToMPI"
4754 PetscErrorCode  MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4755 {
4756   PetscErrorCode   ierr;
4757 
4758   PetscFunctionBegin;
4759   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4760   if (scall == MAT_INITIAL_MATRIX){
4761     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4762   }
4763   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4764   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4765   PetscFunctionReturn(0);
4766 }
4767 
4768 #undef __FUNCT__
4769 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4770 /*@
4771      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4772           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4773           with MatGetSize()
4774 
4775     Not Collective
4776 
4777    Input Parameters:
4778 +    A - the matrix
4779 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4780 
4781    Output Parameter:
4782 .    A_loc - the local sequential matrix generated
4783 
4784     Level: developer
4785 
4786 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4787 
4788 @*/
4789 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4790 {
4791   PetscErrorCode  ierr;
4792   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4793   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4794   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4795   MatScalar       *aa=a->a,*ba=b->a,*cam;
4796   PetscScalar     *ca;
4797   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4798   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4799   PetscBool       match;
4800 
4801   PetscFunctionBegin;
4802   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4803   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4804   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4805   if (scall == MAT_INITIAL_MATRIX){
4806     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4807     ci[0] = 0;
4808     for (i=0; i<am; i++){
4809       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4810     }
4811     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4812     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4813     k = 0;
4814     for (i=0; i<am; i++) {
4815       ncols_o = bi[i+1] - bi[i];
4816       ncols_d = ai[i+1] - ai[i];
4817       /* off-diagonal portion of A */
4818       for (jo=0; jo<ncols_o; jo++) {
4819         col = cmap[*bj];
4820         if (col >= cstart) break;
4821         cj[k]   = col; bj++;
4822         ca[k++] = *ba++;
4823       }
4824       /* diagonal portion of A */
4825       for (j=0; j<ncols_d; j++) {
4826         cj[k]   = cstart + *aj++;
4827         ca[k++] = *aa++;
4828       }
4829       /* off-diagonal portion of A */
4830       for (j=jo; j<ncols_o; j++) {
4831         cj[k]   = cmap[*bj++];
4832         ca[k++] = *ba++;
4833       }
4834     }
4835     /* put together the new matrix */
4836     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4837     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4838     /* Since these are PETSc arrays, change flags to free them as necessary. */
4839     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4840     mat->free_a  = PETSC_TRUE;
4841     mat->free_ij = PETSC_TRUE;
4842     mat->nonew   = 0;
4843   } else if (scall == MAT_REUSE_MATRIX){
4844     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4845     ci = mat->i; cj = mat->j; cam = mat->a;
4846     for (i=0; i<am; i++) {
4847       /* off-diagonal portion of A */
4848       ncols_o = bi[i+1] - bi[i];
4849       for (jo=0; jo<ncols_o; jo++) {
4850         col = cmap[*bj];
4851         if (col >= cstart) break;
4852         *cam++ = *ba++; bj++;
4853       }
4854       /* diagonal portion of A */
4855       ncols_d = ai[i+1] - ai[i];
4856       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4857       /* off-diagonal portion of A */
4858       for (j=jo; j<ncols_o; j++) {
4859         *cam++ = *ba++; bj++;
4860       }
4861     }
4862   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4863   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4864   PetscFunctionReturn(0);
4865 }
4866 
4867 #undef __FUNCT__
4868 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4869 /*@C
4870      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4871 
4872     Not Collective
4873 
4874    Input Parameters:
4875 +    A - the matrix
4876 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4877 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4878 
4879    Output Parameter:
4880 .    A_loc - the local sequential matrix generated
4881 
4882     Level: developer
4883 
4884 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4885 
4886 @*/
4887 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4888 {
4889   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4890   PetscErrorCode    ierr;
4891   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4892   IS                isrowa,iscola;
4893   Mat               *aloc;
4894   PetscBool       match;
4895 
4896   PetscFunctionBegin;
4897   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4898   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4899   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4900   if (!row){
4901     start = A->rmap->rstart; end = A->rmap->rend;
4902     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4903   } else {
4904     isrowa = *row;
4905   }
4906   if (!col){
4907     start = A->cmap->rstart;
4908     cmap  = a->garray;
4909     nzA   = a->A->cmap->n;
4910     nzB   = a->B->cmap->n;
4911     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4912     ncols = 0;
4913     for (i=0; i<nzB; i++) {
4914       if (cmap[i] < start) idx[ncols++] = cmap[i];
4915       else break;
4916     }
4917     imark = i;
4918     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4919     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4920     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4921   } else {
4922     iscola = *col;
4923   }
4924   if (scall != MAT_INITIAL_MATRIX){
4925     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4926     aloc[0] = *A_loc;
4927   }
4928   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4929   *A_loc = aloc[0];
4930   ierr = PetscFree(aloc);CHKERRQ(ierr);
4931   if (!row){
4932     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4933   }
4934   if (!col){
4935     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4936   }
4937   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4938   PetscFunctionReturn(0);
4939 }
4940 
4941 #undef __FUNCT__
4942 #define __FUNCT__ "MatGetBrowsOfAcols"
4943 /*@C
4944     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4945 
4946     Collective on Mat
4947 
4948    Input Parameters:
4949 +    A,B - the matrices in mpiaij format
4950 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4951 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4952 
4953    Output Parameter:
4954 +    rowb, colb - index sets of rows and columns of B to extract
4955 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4956 -    B_seq - the sequential matrix generated
4957 
4958     Level: developer
4959 
4960 @*/
4961 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4962 {
4963   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4964   PetscErrorCode    ierr;
4965   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4966   IS                isrowb,iscolb;
4967   Mat               *bseq;
4968 
4969   PetscFunctionBegin;
4970   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4971     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4972   }
4973   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4974 
4975   if (scall == MAT_INITIAL_MATRIX){
4976     start = A->cmap->rstart;
4977     cmap  = a->garray;
4978     nzA   = a->A->cmap->n;
4979     nzB   = a->B->cmap->n;
4980     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4981     ncols = 0;
4982     for (i=0; i<nzB; i++) {  /* row < local row index */
4983       if (cmap[i] < start) idx[ncols++] = cmap[i];
4984       else break;
4985     }
4986     imark = i;
4987     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4988     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4989     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4990     *brstart = imark;
4991     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4992   } else {
4993     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4994     isrowb = *rowb; iscolb = *colb;
4995     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4996     bseq[0] = *B_seq;
4997   }
4998   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4999   *B_seq = bseq[0];
5000   ierr = PetscFree(bseq);CHKERRQ(ierr);
5001   if (!rowb){
5002     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5003   } else {
5004     *rowb = isrowb;
5005   }
5006   if (!colb){
5007     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5008   } else {
5009     *colb = iscolb;
5010   }
5011   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5012   PetscFunctionReturn(0);
5013 }
5014 
5015 #undef __FUNCT__
5016 #define __FUNCT__ "MatGetBrowsOfAoCols"
5017 /*@C
5018     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5019     of the OFF-DIAGONAL portion of local A
5020 
5021     Collective on Mat
5022 
5023    Input Parameters:
5024 +    A,B - the matrices in mpiaij format
5025 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5026 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5027 .    startsj_r - similar to startsj for receives
5028 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5029 
5030    Output Parameter:
5031 +    B_oth - the sequential matrix generated
5032 
5033     Level: developer
5034 
5035 @*/
5036 PetscErrorCode  MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5037 {
5038   VecScatter_MPI_General *gen_to,*gen_from;
5039   PetscErrorCode         ierr;
5040   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5041   Mat_SeqAIJ             *b_oth;
5042   VecScatter             ctx=a->Mvctx;
5043   MPI_Comm               comm=((PetscObject)ctx)->comm;
5044   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5045   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5046   PetscScalar            *rvalues,*svalues;
5047   MatScalar              *b_otha,*bufa,*bufA;
5048   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5049   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5050   MPI_Status             *sstatus,rstatus;
5051   PetscMPIInt            jj;
5052   PetscInt               *cols,sbs,rbs;
5053   PetscScalar            *vals;
5054 
5055   PetscFunctionBegin;
5056   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5057     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5058   }
5059   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5060   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5061 
5062   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5063   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5064   rvalues  = gen_from->values; /* holds the length of receiving row */
5065   svalues  = gen_to->values;   /* holds the length of sending row */
5066   nrecvs   = gen_from->n;
5067   nsends   = gen_to->n;
5068 
5069   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5070   srow     = gen_to->indices;   /* local row index to be sent */
5071   sstarts  = gen_to->starts;
5072   sprocs   = gen_to->procs;
5073   sstatus  = gen_to->sstatus;
5074   sbs      = gen_to->bs;
5075   rstarts  = gen_from->starts;
5076   rprocs   = gen_from->procs;
5077   rbs      = gen_from->bs;
5078 
5079   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5080   if (scall == MAT_INITIAL_MATRIX){
5081     /* i-array */
5082     /*---------*/
5083     /*  post receives */
5084     for (i=0; i<nrecvs; i++){
5085       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5086       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5087       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5088     }
5089 
5090     /* pack the outgoing message */
5091     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5092     sstartsj[0] = 0;  rstartsj[0] = 0;
5093     len = 0; /* total length of j or a array to be sent */
5094     k = 0;
5095     for (i=0; i<nsends; i++){
5096       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5097       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5098       for (j=0; j<nrows; j++) {
5099         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5100         for (l=0; l<sbs; l++){
5101           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5102           rowlen[j*sbs+l] = ncols;
5103           len += ncols;
5104           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5105         }
5106         k++;
5107       }
5108       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5109       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5110     }
5111     /* recvs and sends of i-array are completed */
5112     i = nrecvs;
5113     while (i--) {
5114       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5115     }
5116     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5117 
5118     /* allocate buffers for sending j and a arrays */
5119     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5120     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5121 
5122     /* create i-array of B_oth */
5123     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5124     b_othi[0] = 0;
5125     len = 0; /* total length of j or a array to be received */
5126     k = 0;
5127     for (i=0; i<nrecvs; i++){
5128       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5129       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5130       for (j=0; j<nrows; j++) {
5131         b_othi[k+1] = b_othi[k] + rowlen[j];
5132         len += rowlen[j]; k++;
5133       }
5134       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5135     }
5136 
5137     /* allocate space for j and a arrrays of B_oth */
5138     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5139     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5140 
5141     /* j-array */
5142     /*---------*/
5143     /*  post receives of j-array */
5144     for (i=0; i<nrecvs; i++){
5145       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5146       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5147     }
5148 
5149     /* pack the outgoing message j-array */
5150     k = 0;
5151     for (i=0; i<nsends; i++){
5152       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5153       bufJ = bufj+sstartsj[i];
5154       for (j=0; j<nrows; j++) {
5155         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5156         for (ll=0; ll<sbs; ll++){
5157           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5158           for (l=0; l<ncols; l++){
5159             *bufJ++ = cols[l];
5160           }
5161           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5162         }
5163       }
5164       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5165     }
5166 
5167     /* recvs and sends of j-array are completed */
5168     i = nrecvs;
5169     while (i--) {
5170       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5171     }
5172     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5173   } else if (scall == MAT_REUSE_MATRIX){
5174     sstartsj = *startsj;
5175     rstartsj = *startsj_r;
5176     bufa     = *bufa_ptr;
5177     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5178     b_otha   = b_oth->a;
5179   } else {
5180     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5181   }
5182 
5183   /* a-array */
5184   /*---------*/
5185   /*  post receives of a-array */
5186   for (i=0; i<nrecvs; i++){
5187     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5188     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5189   }
5190 
5191   /* pack the outgoing message a-array */
5192   k = 0;
5193   for (i=0; i<nsends; i++){
5194     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5195     bufA = bufa+sstartsj[i];
5196     for (j=0; j<nrows; j++) {
5197       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5198       for (ll=0; ll<sbs; ll++){
5199         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5200         for (l=0; l<ncols; l++){
5201           *bufA++ = vals[l];
5202         }
5203         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5204       }
5205     }
5206     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5207   }
5208   /* recvs and sends of a-array are completed */
5209   i = nrecvs;
5210   while (i--) {
5211     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5212   }
5213   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5214   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5215 
5216   if (scall == MAT_INITIAL_MATRIX){
5217     /* put together the new matrix */
5218     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5219 
5220     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5221     /* Since these are PETSc arrays, change flags to free them as necessary. */
5222     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5223     b_oth->free_a  = PETSC_TRUE;
5224     b_oth->free_ij = PETSC_TRUE;
5225     b_oth->nonew   = 0;
5226 
5227     ierr = PetscFree(bufj);CHKERRQ(ierr);
5228     if (!startsj || !bufa_ptr){
5229       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5230       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5231     } else {
5232       *startsj   = sstartsj;
5233       *startsj_r = rstartsj;
5234       *bufa_ptr  = bufa;
5235     }
5236   }
5237   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5238   PetscFunctionReturn(0);
5239 }
5240 
5241 #undef __FUNCT__
5242 #define __FUNCT__ "MatGetCommunicationStructs"
5243 /*@C
5244   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5245 
5246   Not Collective
5247 
5248   Input Parameters:
5249 . A - The matrix in mpiaij format
5250 
5251   Output Parameter:
5252 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5253 . colmap - A map from global column index to local index into lvec
5254 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5255 
5256   Level: developer
5257 
5258 @*/
5259 #if defined (PETSC_USE_CTABLE)
5260 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5261 #else
5262 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5263 #endif
5264 {
5265   Mat_MPIAIJ *a;
5266 
5267   PetscFunctionBegin;
5268   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5269   PetscValidPointer(lvec, 2);
5270   PetscValidPointer(colmap, 3);
5271   PetscValidPointer(multScatter, 4);
5272   a = (Mat_MPIAIJ *) A->data;
5273   if (lvec) *lvec = a->lvec;
5274   if (colmap) *colmap = a->colmap;
5275   if (multScatter) *multScatter = a->Mvctx;
5276   PetscFunctionReturn(0);
5277 }
5278 
5279 EXTERN_C_BEGIN
5280 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5281 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5282 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5283 EXTERN_C_END
5284 
5285 #undef __FUNCT__
5286 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5287 /*
5288     Computes (B'*A')' since computing B*A directly is untenable
5289 
5290                n                       p                          p
5291         (              )       (              )         (                  )
5292       m (      A       )  *  n (       B      )   =   m (         C        )
5293         (              )       (              )         (                  )
5294 
5295 */
5296 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5297 {
5298   PetscErrorCode     ierr;
5299   Mat                At,Bt,Ct;
5300 
5301   PetscFunctionBegin;
5302   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5303   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5304   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5305   ierr = MatDestroy(&At);CHKERRQ(ierr);
5306   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5307   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5308   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5309   PetscFunctionReturn(0);
5310 }
5311 
5312 #undef __FUNCT__
5313 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5314 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5315 {
5316   PetscErrorCode ierr;
5317   PetscInt       m=A->rmap->n,n=B->cmap->n;
5318   Mat            Cmat;
5319 
5320   PetscFunctionBegin;
5321   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5322   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5323   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5324   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5325   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5326   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5327   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5328   *C   = Cmat;
5329   PetscFunctionReturn(0);
5330 }
5331 
5332 /* ----------------------------------------------------------------*/
5333 #undef __FUNCT__
5334 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5335 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5336 {
5337   PetscErrorCode ierr;
5338 
5339   PetscFunctionBegin;
5340   if (scall == MAT_INITIAL_MATRIX){
5341     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5342   }
5343   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5344   PetscFunctionReturn(0);
5345 }
5346 
5347 EXTERN_C_BEGIN
5348 #if defined(PETSC_HAVE_MUMPS)
5349 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5350 #endif
5351 #if defined(PETSC_HAVE_PASTIX)
5352 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5353 #endif
5354 #if defined(PETSC_HAVE_SUPERLU_DIST)
5355 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5356 #endif
5357 #if defined(PETSC_HAVE_SPOOLES)
5358 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5359 #endif
5360 EXTERN_C_END
5361 
5362 /*MC
5363    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5364 
5365    Options Database Keys:
5366 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5367 
5368   Level: beginner
5369 
5370 .seealso: MatCreateMPIAIJ()
5371 M*/
5372 
5373 EXTERN_C_BEGIN
5374 #undef __FUNCT__
5375 #define __FUNCT__ "MatCreate_MPIAIJ"
5376 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5377 {
5378   Mat_MPIAIJ     *b;
5379   PetscErrorCode ierr;
5380   PetscMPIInt    size;
5381 
5382   PetscFunctionBegin;
5383   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5384 
5385   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5386   B->data         = (void*)b;
5387   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5388   B->rmap->bs     = 1;
5389   B->assembled    = PETSC_FALSE;
5390 
5391   B->insertmode   = NOT_SET_VALUES;
5392   b->size         = size;
5393   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5394 
5395   /* build cache for off array entries formed */
5396   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5397   b->donotstash  = PETSC_FALSE;
5398   b->colmap      = 0;
5399   b->garray      = 0;
5400   b->roworiented = PETSC_TRUE;
5401 
5402   /* stuff used for matrix vector multiply */
5403   b->lvec      = PETSC_NULL;
5404   b->Mvctx     = PETSC_NULL;
5405 
5406   /* stuff for MatGetRow() */
5407   b->rowindices   = 0;
5408   b->rowvalues    = 0;
5409   b->getrowactive = PETSC_FALSE;
5410 
5411 #if defined(PETSC_HAVE_SPOOLES)
5412   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5413                                      "MatGetFactor_mpiaij_spooles",
5414                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5415 #endif
5416 #if defined(PETSC_HAVE_MUMPS)
5417   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5418                                      "MatGetFactor_aij_mumps",
5419                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5420 #endif
5421 #if defined(PETSC_HAVE_PASTIX)
5422   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5423 					   "MatGetFactor_mpiaij_pastix",
5424 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5425 #endif
5426 #if defined(PETSC_HAVE_SUPERLU_DIST)
5427   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5428                                      "MatGetFactor_mpiaij_superlu_dist",
5429                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5430 #endif
5431   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5432                                      "MatStoreValues_MPIAIJ",
5433                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5434   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5435                                      "MatRetrieveValues_MPIAIJ",
5436                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5437   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5438 				     "MatGetDiagonalBlock_MPIAIJ",
5439                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5440   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5441 				     "MatIsTranspose_MPIAIJ",
5442 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5443   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5444 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5445 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5446   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5447 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5448 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5449   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5450 				     "MatDiagonalScaleLocal_MPIAIJ",
5451 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5452   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5453                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5454                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5455   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5456                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5457                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5458   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5459                                      "MatConvert_MPIAIJ_MPISBAIJ",
5460                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5461   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5462                                      "MatMatMult_MPIDense_MPIAIJ",
5463                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5464   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5465                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5466                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5467   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5468                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5469                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5470   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5471   PetscFunctionReturn(0);
5472 }
5473 EXTERN_C_END
5474 
5475 #undef __FUNCT__
5476 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5477 /*@
5478      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5479          and "off-diagonal" part of the matrix in CSR format.
5480 
5481    Collective on MPI_Comm
5482 
5483    Input Parameters:
5484 +  comm - MPI communicator
5485 .  m - number of local rows (Cannot be PETSC_DECIDE)
5486 .  n - This value should be the same as the local size used in creating the
5487        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5488        calculated if N is given) For square matrices n is almost always m.
5489 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5490 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5491 .   i - row indices for "diagonal" portion of matrix
5492 .   j - column indices
5493 .   a - matrix values
5494 .   oi - row indices for "off-diagonal" portion of matrix
5495 .   oj - column indices
5496 -   oa - matrix values
5497 
5498    Output Parameter:
5499 .   mat - the matrix
5500 
5501    Level: advanced
5502 
5503    Notes:
5504        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5505        must free the arrays once the matrix has been destroyed and not before.
5506 
5507        The i and j indices are 0 based
5508 
5509        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5510 
5511        This sets local rows and cannot be used to set off-processor values.
5512 
5513        You cannot later use MatSetValues() to change values in this matrix.
5514 
5515 .keywords: matrix, aij, compressed row, sparse, parallel
5516 
5517 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5518           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5519 @*/
5520 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5521 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5522 {
5523   PetscErrorCode ierr;
5524   Mat_MPIAIJ     *maij;
5525 
5526  PetscFunctionBegin;
5527   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5528   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5529   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5530   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5531   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5532   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5533   maij = (Mat_MPIAIJ*) (*mat)->data;
5534   maij->donotstash     = PETSC_TRUE;
5535   (*mat)->preallocated = PETSC_TRUE;
5536 
5537   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5538   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5539   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5540   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5541 
5542   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5543   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5544 
5545   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5546   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5547   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5548   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5549 
5550   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5551   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5552   PetscFunctionReturn(0);
5553 }
5554 
5555 /*
5556     Special version for direct calls from Fortran
5557 */
5558 #include <private/fortranimpl.h>
5559 
5560 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5561 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5562 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5563 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5564 #endif
5565 
5566 /* Change these macros so can be used in void function */
5567 #undef CHKERRQ
5568 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5569 #undef SETERRQ2
5570 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5571 #undef SETERRQ
5572 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5573 
5574 EXTERN_C_BEGIN
5575 #undef __FUNCT__
5576 #define __FUNCT__ "matsetvaluesmpiaij_"
5577 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5578 {
5579   Mat             mat = *mmat;
5580   PetscInt        m = *mm, n = *mn;
5581   InsertMode      addv = *maddv;
5582   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5583   PetscScalar     value;
5584   PetscErrorCode  ierr;
5585 
5586   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5587   if (mat->insertmode == NOT_SET_VALUES) {
5588     mat->insertmode = addv;
5589   }
5590 #if defined(PETSC_USE_DEBUG)
5591   else if (mat->insertmode != addv) {
5592     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5593   }
5594 #endif
5595   {
5596   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5597   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5598   PetscBool       roworiented = aij->roworiented;
5599 
5600   /* Some Variables required in the macro */
5601   Mat             A = aij->A;
5602   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5603   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5604   MatScalar       *aa = a->a;
5605   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5606   Mat             B = aij->B;
5607   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5608   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5609   MatScalar       *ba = b->a;
5610 
5611   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5612   PetscInt        nonew = a->nonew;
5613   MatScalar       *ap1,*ap2;
5614 
5615   PetscFunctionBegin;
5616   for (i=0; i<m; i++) {
5617     if (im[i] < 0) continue;
5618 #if defined(PETSC_USE_DEBUG)
5619     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5620 #endif
5621     if (im[i] >= rstart && im[i] < rend) {
5622       row      = im[i] - rstart;
5623       lastcol1 = -1;
5624       rp1      = aj + ai[row];
5625       ap1      = aa + ai[row];
5626       rmax1    = aimax[row];
5627       nrow1    = ailen[row];
5628       low1     = 0;
5629       high1    = nrow1;
5630       lastcol2 = -1;
5631       rp2      = bj + bi[row];
5632       ap2      = ba + bi[row];
5633       rmax2    = bimax[row];
5634       nrow2    = bilen[row];
5635       low2     = 0;
5636       high2    = nrow2;
5637 
5638       for (j=0; j<n; j++) {
5639         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5640         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5641         if (in[j] >= cstart && in[j] < cend){
5642           col = in[j] - cstart;
5643           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5644         } else if (in[j] < 0) continue;
5645 #if defined(PETSC_USE_DEBUG)
5646         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5647 #endif
5648         else {
5649           if (mat->was_assembled) {
5650             if (!aij->colmap) {
5651               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5652             }
5653 #if defined (PETSC_USE_CTABLE)
5654             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5655 	    col--;
5656 #else
5657             col = aij->colmap[in[j]] - 1;
5658 #endif
5659             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5660               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5661               col =  in[j];
5662               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5663               B = aij->B;
5664               b = (Mat_SeqAIJ*)B->data;
5665               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5666               rp2      = bj + bi[row];
5667               ap2      = ba + bi[row];
5668               rmax2    = bimax[row];
5669               nrow2    = bilen[row];
5670               low2     = 0;
5671               high2    = nrow2;
5672               bm       = aij->B->rmap->n;
5673               ba = b->a;
5674             }
5675           } else col = in[j];
5676           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5677         }
5678       }
5679     } else {
5680       if (!aij->donotstash) {
5681         if (roworiented) {
5682           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5683         } else {
5684           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5685         }
5686       }
5687     }
5688   }}
5689   PetscFunctionReturnVoid();
5690 }
5691 EXTERN_C_END
5692 
5693