xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 7842634f46ad2ee292fa135afd07ef040c80768f)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "petscblaslapack.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscTruth     aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap->n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap->n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap->n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap->n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
207 { \
208     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
209     lastcol1 = col;\
210     while (high1-low1 > 5) { \
211       t = (low1+high1)/2; \
212       if (rp1[t] > col) high1 = t; \
213       else             low1  = t; \
214     } \
215       for (_i=low1; _i<high1; _i++) { \
216         if (rp1[_i] > col) break; \
217         if (rp1[_i] == col) { \
218           if (addv == ADD_VALUES) ap1[_i] += value;   \
219           else                    ap1[_i] = value; \
220           goto a_noinsert; \
221         } \
222       }  \
223       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
224       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
225       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
226       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
227       N = nrow1++ - 1; a->nz++; high1++; \
228       /* shift up all the later entries in this row */ \
229       for (ii=N; ii>=_i; ii--) { \
230         rp1[ii+1] = rp1[ii]; \
231         ap1[ii+1] = ap1[ii]; \
232       } \
233       rp1[_i] = col;  \
234       ap1[_i] = value;  \
235       a_noinsert: ; \
236       ailen[row] = nrow1; \
237 }
238 
239 
240 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
241 { \
242     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
243     lastcol2 = col;\
244     while (high2-low2 > 5) { \
245       t = (low2+high2)/2; \
246       if (rp2[t] > col) high2 = t; \
247       else             low2  = t; \
248     } \
249     for (_i=low2; _i<high2; _i++) {		\
250       if (rp2[_i] > col) break;			\
251       if (rp2[_i] == col) {			      \
252 	if (addv == ADD_VALUES) ap2[_i] += value;     \
253 	else                    ap2[_i] = value;      \
254 	goto b_noinsert;			      \
255       }						      \
256     }							      \
257     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
258     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
259     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
260     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
261     N = nrow2++ - 1; b->nz++; high2++;					\
262     /* shift up all the later entries in this row */			\
263     for (ii=N; ii>=_i; ii--) {						\
264       rp2[ii+1] = rp2[ii];						\
265       ap2[ii+1] = ap2[ii];						\
266     }									\
267     rp2[_i] = col;							\
268     ap2[_i] = value;							\
269     b_noinsert: ;								\
270     bilen[row] = nrow2;							\
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
275 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
276 {
277   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
278   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
279   PetscErrorCode ierr;
280   PetscInt       l,*garray = mat->garray,diag;
281 
282   PetscFunctionBegin;
283   /* code only works for square matrices A */
284 
285   /* find size of row to the left of the diagonal part */
286   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
287   row  = row - diag;
288   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
289     if (garray[b->j[b->i[row]+l]] > diag) break;
290   }
291   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
292 
293   /* diagonal part */
294   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
295 
296   /* right of diagonal part */
297   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
298   PetscFunctionReturn(0);
299 }
300 
301 #undef __FUNCT__
302 #define __FUNCT__ "MatSetValues_MPIAIJ"
303 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
304 {
305   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
306   PetscScalar    value;
307   PetscErrorCode ierr;
308   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
309   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
310   PetscTruth     roworiented = aij->roworiented;
311 
312   /* Some Variables required in the macro */
313   Mat            A = aij->A;
314   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
315   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
316   MatScalar      *aa = a->a;
317   PetscTruth     ignorezeroentries = a->ignorezeroentries;
318   Mat            B = aij->B;
319   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
320   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
321   MatScalar      *ba = b->a;
322 
323   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
324   PetscInt       nonew = a->nonew;
325   MatScalar      *ap1,*ap2;
326 
327   PetscFunctionBegin;
328   if (v) PetscValidScalarPointer(v,6);
329   for (i=0; i<m; i++) {
330     if (im[i] < 0) continue;
331 #if defined(PETSC_USE_DEBUG)
332     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
333 #endif
334     if (im[i] >= rstart && im[i] < rend) {
335       row      = im[i] - rstart;
336       lastcol1 = -1;
337       rp1      = aj + ai[row];
338       ap1      = aa + ai[row];
339       rmax1    = aimax[row];
340       nrow1    = ailen[row];
341       low1     = 0;
342       high1    = nrow1;
343       lastcol2 = -1;
344       rp2      = bj + bi[row];
345       ap2      = ba + bi[row];
346       rmax2    = bimax[row];
347       nrow2    = bilen[row];
348       low2     = 0;
349       high2    = nrow2;
350 
351       for (j=0; j<n; j++) {
352         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
353         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
354         if (in[j] >= cstart && in[j] < cend){
355           col = in[j] - cstart;
356           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
357         } else if (in[j] < 0) continue;
358 #if defined(PETSC_USE_DEBUG)
359         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
360 #endif
361         else {
362           if (mat->was_assembled) {
363             if (!aij->colmap) {
364               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
365             }
366 #if defined (PETSC_USE_CTABLE)
367             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
368 	    col--;
369 #else
370             col = aij->colmap[in[j]] - 1;
371 #endif
372             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
373               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
374               col =  in[j];
375               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
376               B = aij->B;
377               b = (Mat_SeqAIJ*)B->data;
378               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
379               rp2      = bj + bi[row];
380               ap2      = ba + bi[row];
381               rmax2    = bimax[row];
382               nrow2    = bilen[row];
383               low2     = 0;
384               high2    = nrow2;
385               bm       = aij->B->rmap->n;
386               ba = b->a;
387             }
388           } else col = in[j];
389           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
390         }
391       }
392     } else {
393       if (!aij->donotstash) {
394         if (roworiented) {
395           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
396         } else {
397           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
398         }
399       }
400     }
401   }
402   PetscFunctionReturn(0);
403 }
404 
405 #undef __FUNCT__
406 #define __FUNCT__ "MatGetValues_MPIAIJ"
407 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
408 {
409   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
410   PetscErrorCode ierr;
411   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
412   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
413 
414   PetscFunctionBegin;
415   for (i=0; i<m; i++) {
416     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
417     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
418     if (idxm[i] >= rstart && idxm[i] < rend) {
419       row = idxm[i] - rstart;
420       for (j=0; j<n; j++) {
421         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
422         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
423         if (idxn[j] >= cstart && idxn[j] < cend){
424           col = idxn[j] - cstart;
425           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
426         } else {
427           if (!aij->colmap) {
428             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
429           }
430 #if defined (PETSC_USE_CTABLE)
431           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
432           col --;
433 #else
434           col = aij->colmap[idxn[j]] - 1;
435 #endif
436           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
437           else {
438             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
439           }
440         }
441       }
442     } else {
443       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
444     }
445   }
446   PetscFunctionReturn(0);
447 }
448 
449 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
453 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
454 {
455   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
456   PetscErrorCode ierr;
457   PetscInt       nstash,reallocs;
458   InsertMode     addv;
459 
460   PetscFunctionBegin;
461   if (aij->donotstash) {
462     PetscFunctionReturn(0);
463   }
464 
465   /* make sure all processors are either in INSERTMODE or ADDMODE */
466   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
467   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
468   mat->insertmode = addv; /* in case this processor had no cache */
469 
470   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
471   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
472   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
473   PetscFunctionReturn(0);
474 }
475 
476 #undef __FUNCT__
477 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
478 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
479 {
480   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
481   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
482   PetscErrorCode ierr;
483   PetscMPIInt    n;
484   PetscInt       i,j,rstart,ncols,flg;
485   PetscInt       *row,*col;
486   PetscTruth     other_disassembled;
487   PetscScalar    *val;
488   InsertMode     addv = mat->insertmode;
489 
490   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
491   PetscFunctionBegin;
492   if (!aij->donotstash) {
493     while (1) {
494       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
495       if (!flg) break;
496 
497       for (i=0; i<n;) {
498         /* Now identify the consecutive vals belonging to the same row */
499         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
500         if (j < n) ncols = j-i;
501         else       ncols = n-i;
502         /* Now assemble all these values with a single function call */
503         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
504         i = j;
505       }
506     }
507     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
508   }
509   a->compressedrow.use     = PETSC_FALSE;
510   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
511   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
512 
513   /* determine if any processor has disassembled, if so we must
514      also disassemble ourselfs, in order that we may reassemble. */
515   /*
516      if nonzero structure of submatrix B cannot change then we know that
517      no processor disassembled thus we can skip this stuff
518   */
519   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
520     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
521     if (mat->was_assembled && !other_disassembled) {
522       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
523     }
524   }
525   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
526     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
527   }
528   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
529   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
530   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
531   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
532 
533   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
534   aij->rowvalues = 0;
535 
536   /* used by MatAXPY() */
537   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
538   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
539 
540   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
541   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
542   PetscFunctionReturn(0);
543 }
544 
545 #undef __FUNCT__
546 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
547 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
548 {
549   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
550   PetscErrorCode ierr;
551 
552   PetscFunctionBegin;
553   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
554   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
555   PetscFunctionReturn(0);
556 }
557 
558 #undef __FUNCT__
559 #define __FUNCT__ "MatZeroRows_MPIAIJ"
560 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
561 {
562   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
563   PetscErrorCode ierr;
564   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
565   PetscInt       i,*owners = A->rmap->range;
566   PetscInt       *nprocs,j,idx,nsends,row;
567   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
568   PetscInt       *rvalues,count,base,slen,*source;
569   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
570   MPI_Comm       comm = ((PetscObject)A)->comm;
571   MPI_Request    *send_waits,*recv_waits;
572   MPI_Status     recv_status,*send_status;
573 #if defined(PETSC_DEBUG)
574   PetscTruth     found = PETSC_FALSE;
575 #endif
576 
577   PetscFunctionBegin;
578   /*  first count number of contributors to each processor */
579   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
580   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
581   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
582   j = 0;
583   for (i=0; i<N; i++) {
584     if (lastidx > (idx = rows[i])) j = 0;
585     lastidx = idx;
586     for (; j<size; j++) {
587       if (idx >= owners[j] && idx < owners[j+1]) {
588         nprocs[2*j]++;
589         nprocs[2*j+1] = 1;
590         owner[i] = j;
591 #if defined(PETSC_DEBUG)
592         found = PETSC_TRUE;
593 #endif
594         break;
595       }
596     }
597 #if defined(PETSC_DEBUG)
598     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
599     found = PETSC_FALSE;
600 #endif
601   }
602   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
603 
604   /* inform other processors of number of messages and max length*/
605   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
606 
607   /* post receives:   */
608   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
609   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
610   for (i=0; i<nrecvs; i++) {
611     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
612   }
613 
614   /* do sends:
615       1) starts[i] gives the starting index in svalues for stuff going to
616          the ith processor
617   */
618   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
619   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
620   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
621   starts[0] = 0;
622   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
623   for (i=0; i<N; i++) {
624     svalues[starts[owner[i]]++] = rows[i];
625   }
626 
627   starts[0] = 0;
628   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
629   count = 0;
630   for (i=0; i<size; i++) {
631     if (nprocs[2*i+1]) {
632       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
633     }
634   }
635   ierr = PetscFree(starts);CHKERRQ(ierr);
636 
637   base = owners[rank];
638 
639   /*  wait on receives */
640   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
641   count  = nrecvs; slen = 0;
642   while (count) {
643     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
644     /* unpack receives into our local space */
645     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
646     source[imdex]  = recv_status.MPI_SOURCE;
647     lens[imdex]    = n;
648     slen          += n;
649     count--;
650   }
651   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
652 
653   /* move the data into the send scatter */
654   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
655   count = 0;
656   for (i=0; i<nrecvs; i++) {
657     values = rvalues + i*nmax;
658     for (j=0; j<lens[i]; j++) {
659       lrows[count++] = values[j] - base;
660     }
661   }
662   ierr = PetscFree(rvalues);CHKERRQ(ierr);
663   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
664   ierr = PetscFree(owner);CHKERRQ(ierr);
665   ierr = PetscFree(nprocs);CHKERRQ(ierr);
666 
667   /* actually zap the local rows */
668   /*
669         Zero the required rows. If the "diagonal block" of the matrix
670      is square and the user wishes to set the diagonal we use separate
671      code so that MatSetValues() is not called for each diagonal allocating
672      new memory, thus calling lots of mallocs and slowing things down.
673 
674   */
675   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
676   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
677   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
678     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
679   } else if (diag != 0.0) {
680     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
681     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
682       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
683 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
684     }
685     for (i = 0; i < slen; i++) {
686       row  = lrows[i] + rstart;
687       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
688     }
689     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
690     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
691   } else {
692     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
693   }
694   ierr = PetscFree(lrows);CHKERRQ(ierr);
695 
696   /* wait on sends */
697   if (nsends) {
698     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
699     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
700     ierr = PetscFree(send_status);CHKERRQ(ierr);
701   }
702   ierr = PetscFree(send_waits);CHKERRQ(ierr);
703   ierr = PetscFree(svalues);CHKERRQ(ierr);
704 
705   PetscFunctionReturn(0);
706 }
707 
708 #undef __FUNCT__
709 #define __FUNCT__ "MatMult_MPIAIJ"
710 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
711 {
712   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
713   PetscErrorCode ierr;
714   PetscInt       nt;
715 
716   PetscFunctionBegin;
717   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
718   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
719   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
720   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
721   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
722   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
723   PetscFunctionReturn(0);
724 }
725 
726 #undef __FUNCT__
727 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
728 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
729 {
730   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
731   PetscErrorCode ierr;
732 
733   PetscFunctionBegin;
734   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
735   PetscFunctionReturn(0);
736 }
737 
738 #undef __FUNCT__
739 #define __FUNCT__ "MatMultAdd_MPIAIJ"
740 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
741 {
742   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
743   PetscErrorCode ierr;
744 
745   PetscFunctionBegin;
746   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
747   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
748   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
749   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
750   PetscFunctionReturn(0);
751 }
752 
753 #undef __FUNCT__
754 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
755 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
756 {
757   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
758   PetscErrorCode ierr;
759   PetscTruth     merged;
760 
761   PetscFunctionBegin;
762   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
763   /* do nondiagonal part */
764   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
765   if (!merged) {
766     /* send it on its way */
767     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
768     /* do local part */
769     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
770     /* receive remote parts: note this assumes the values are not actually */
771     /* added in yy until the next line, */
772     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
773   } else {
774     /* do local part */
775     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
776     /* send it on its way */
777     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
778     /* values actually were received in the Begin() but we need to call this nop */
779     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
780   }
781   PetscFunctionReturn(0);
782 }
783 
784 EXTERN_C_BEGIN
785 #undef __FUNCT__
786 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
787 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
788 {
789   MPI_Comm       comm;
790   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
791   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
792   IS             Me,Notme;
793   PetscErrorCode ierr;
794   PetscInt       M,N,first,last,*notme,i;
795   PetscMPIInt    size;
796 
797   PetscFunctionBegin;
798 
799   /* Easy test: symmetric diagonal block */
800   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
801   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
802   if (!*f) PetscFunctionReturn(0);
803   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
804   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
805   if (size == 1) PetscFunctionReturn(0);
806 
807   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
808   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
809   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
810   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
811   for (i=0; i<first; i++) notme[i] = i;
812   for (i=last; i<M; i++) notme[i-last+first] = i;
813   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
814   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
815   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
816   Aoff = Aoffs[0];
817   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
818   Boff = Boffs[0];
819   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
820   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
821   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
822   ierr = ISDestroy(Me);CHKERRQ(ierr);
823   ierr = ISDestroy(Notme);CHKERRQ(ierr);
824 
825   PetscFunctionReturn(0);
826 }
827 EXTERN_C_END
828 
829 #undef __FUNCT__
830 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
831 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
832 {
833   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
834   PetscErrorCode ierr;
835 
836   PetscFunctionBegin;
837   /* do nondiagonal part */
838   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
839   /* send it on its way */
840   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
841   /* do local part */
842   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
843   /* receive remote parts */
844   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
845   PetscFunctionReturn(0);
846 }
847 
848 /*
849   This only works correctly for square matrices where the subblock A->A is the
850    diagonal block
851 */
852 #undef __FUNCT__
853 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
854 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
855 {
856   PetscErrorCode ierr;
857   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
858 
859   PetscFunctionBegin;
860   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
861   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
862   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
863   PetscFunctionReturn(0);
864 }
865 
866 #undef __FUNCT__
867 #define __FUNCT__ "MatScale_MPIAIJ"
868 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
869 {
870   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
871   PetscErrorCode ierr;
872 
873   PetscFunctionBegin;
874   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
875   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
876   PetscFunctionReturn(0);
877 }
878 
879 #undef __FUNCT__
880 #define __FUNCT__ "MatDestroy_MPIAIJ"
881 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
882 {
883   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
884   PetscErrorCode ierr;
885 
886   PetscFunctionBegin;
887 #if defined(PETSC_USE_LOG)
888   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
889 #endif
890   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
891   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
892   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
893   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
894 #if defined (PETSC_USE_CTABLE)
895   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
896 #else
897   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
898 #endif
899   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
900   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
901   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
902   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
903   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
904   ierr = PetscFree(aij);CHKERRQ(ierr);
905 
906   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
907   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
908   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
909   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
910   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
911   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
912   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
913   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
914   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
915   PetscFunctionReturn(0);
916 }
917 
918 #undef __FUNCT__
919 #define __FUNCT__ "MatView_MPIAIJ_Binary"
920 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
921 {
922   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
923   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
924   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
925   PetscErrorCode    ierr;
926   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
927   int               fd;
928   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
929   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
930   PetscScalar       *column_values;
931 
932   PetscFunctionBegin;
933   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
934   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
935   nz   = A->nz + B->nz;
936   if (!rank) {
937     header[0] = MAT_FILE_CLASSID;
938     header[1] = mat->rmap->N;
939     header[2] = mat->cmap->N;
940     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
941     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
942     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
943     /* get largest number of rows any processor has */
944     rlen = mat->rmap->n;
945     range = mat->rmap->range;
946     for (i=1; i<size; i++) {
947       rlen = PetscMax(rlen,range[i+1] - range[i]);
948     }
949   } else {
950     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
951     rlen = mat->rmap->n;
952   }
953 
954   /* load up the local row counts */
955   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
956   for (i=0; i<mat->rmap->n; i++) {
957     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
958   }
959 
960   /* store the row lengths to the file */
961   if (!rank) {
962     MPI_Status status;
963     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
964     for (i=1; i<size; i++) {
965       rlen = range[i+1] - range[i];
966       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
967       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
968     }
969   } else {
970     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
971   }
972   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
973 
974   /* load up the local column indices */
975   nzmax = nz; /* )th processor needs space a largest processor needs */
976   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
977   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
978   cnt  = 0;
979   for (i=0; i<mat->rmap->n; i++) {
980     for (j=B->i[i]; j<B->i[i+1]; j++) {
981       if ( (col = garray[B->j[j]]) > cstart) break;
982       column_indices[cnt++] = col;
983     }
984     for (k=A->i[i]; k<A->i[i+1]; k++) {
985       column_indices[cnt++] = A->j[k] + cstart;
986     }
987     for (; j<B->i[i+1]; j++) {
988       column_indices[cnt++] = garray[B->j[j]];
989     }
990   }
991   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
992 
993   /* store the column indices to the file */
994   if (!rank) {
995     MPI_Status status;
996     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
997     for (i=1; i<size; i++) {
998       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
999       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1000       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1001       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1002     }
1003   } else {
1004     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1005     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1006   }
1007   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1008 
1009   /* load up the local column values */
1010   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1011   cnt  = 0;
1012   for (i=0; i<mat->rmap->n; i++) {
1013     for (j=B->i[i]; j<B->i[i+1]; j++) {
1014       if ( garray[B->j[j]] > cstart) break;
1015       column_values[cnt++] = B->a[j];
1016     }
1017     for (k=A->i[i]; k<A->i[i+1]; k++) {
1018       column_values[cnt++] = A->a[k];
1019     }
1020     for (; j<B->i[i+1]; j++) {
1021       column_values[cnt++] = B->a[j];
1022     }
1023   }
1024   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1025 
1026   /* store the column values to the file */
1027   if (!rank) {
1028     MPI_Status status;
1029     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1030     for (i=1; i<size; i++) {
1031       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1032       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1033       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1034       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1035     }
1036   } else {
1037     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1038     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1039   }
1040   ierr = PetscFree(column_values);CHKERRQ(ierr);
1041   PetscFunctionReturn(0);
1042 }
1043 
1044 #undef __FUNCT__
1045 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1046 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1047 {
1048   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1049   PetscErrorCode    ierr;
1050   PetscMPIInt       rank = aij->rank,size = aij->size;
1051   PetscTruth        isdraw,iascii,isbinary;
1052   PetscViewer       sviewer;
1053   PetscViewerFormat format;
1054 
1055   PetscFunctionBegin;
1056   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1057   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1058   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1059   if (iascii) {
1060     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1061     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1062       MatInfo    info;
1063       PetscTruth inodes;
1064 
1065       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1066       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1067       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1068       if (!inodes) {
1069         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1070 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1071       } else {
1072         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1073 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1074       }
1075       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1076       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1077       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1078       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1079       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1080       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1081       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1082       PetscFunctionReturn(0);
1083     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1084       PetscInt   inodecount,inodelimit,*inodes;
1085       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1086       if (inodes) {
1087         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1088       } else {
1089         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1090       }
1091       PetscFunctionReturn(0);
1092     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1093       PetscFunctionReturn(0);
1094     }
1095   } else if (isbinary) {
1096     if (size == 1) {
1097       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1098       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1099     } else {
1100       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1101     }
1102     PetscFunctionReturn(0);
1103   } else if (isdraw) {
1104     PetscDraw  draw;
1105     PetscTruth isnull;
1106     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1107     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1108   }
1109 
1110   if (size == 1) {
1111     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1112     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1113   } else {
1114     /* assemble the entire matrix onto first processor. */
1115     Mat         A;
1116     Mat_SeqAIJ  *Aloc;
1117     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1118     MatScalar   *a;
1119 
1120     if (mat->rmap->N > 1024) {
1121       PetscTruth flg = PETSC_FALSE;
1122 
1123       ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1124       if (!flg) {
1125         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1126       }
1127     }
1128 
1129     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1130     if (!rank) {
1131       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1132     } else {
1133       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1134     }
1135     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1136     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1137     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1138     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1139 
1140     /* copy over the A part */
1141     Aloc = (Mat_SeqAIJ*)aij->A->data;
1142     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1143     row = mat->rmap->rstart;
1144     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1145     for (i=0; i<m; i++) {
1146       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1147       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1148     }
1149     aj = Aloc->j;
1150     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1151 
1152     /* copy over the B part */
1153     Aloc = (Mat_SeqAIJ*)aij->B->data;
1154     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1155     row  = mat->rmap->rstart;
1156     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1157     ct   = cols;
1158     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1159     for (i=0; i<m; i++) {
1160       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1161       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1162     }
1163     ierr = PetscFree(ct);CHKERRQ(ierr);
1164     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1165     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1166     /*
1167        Everyone has to call to draw the matrix since the graphics waits are
1168        synchronized across all processors that share the PetscDraw object
1169     */
1170     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1171     if (!rank) {
1172       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1173       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1174     }
1175     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1176     ierr = MatDestroy(A);CHKERRQ(ierr);
1177   }
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 #undef __FUNCT__
1182 #define __FUNCT__ "MatView_MPIAIJ"
1183 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1184 {
1185   PetscErrorCode ierr;
1186   PetscTruth     iascii,isdraw,issocket,isbinary;
1187 
1188   PetscFunctionBegin;
1189   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1190   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1191   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1192   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1193   if (iascii || isdraw || isbinary || issocket) {
1194     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1195   } else {
1196     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1197   }
1198   PetscFunctionReturn(0);
1199 }
1200 
1201 #undef __FUNCT__
1202 #define __FUNCT__ "MatSOR_MPIAIJ"
1203 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1204 {
1205   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1206   PetscErrorCode ierr;
1207   Vec            bb1 = 0;
1208   PetscTruth     hasop;
1209 
1210   PetscFunctionBegin;
1211   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1212     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1213   }
1214 
1215   if (flag == SOR_APPLY_UPPER) {
1216     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1217     PetscFunctionReturn(0);
1218   }
1219 
1220   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1221     if (flag & SOR_ZERO_INITIAL_GUESS) {
1222       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1223       its--;
1224     }
1225 
1226     while (its--) {
1227       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1228       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1229 
1230       /* update rhs: bb1 = bb - B*x */
1231       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1232       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1233 
1234       /* local sweep */
1235       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1236     }
1237   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1238     if (flag & SOR_ZERO_INITIAL_GUESS) {
1239       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1240       its--;
1241     }
1242     while (its--) {
1243       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1244       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1245 
1246       /* update rhs: bb1 = bb - B*x */
1247       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1248       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1249 
1250       /* local sweep */
1251       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1252     }
1253   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1254     if (flag & SOR_ZERO_INITIAL_GUESS) {
1255       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1256       its--;
1257     }
1258     while (its--) {
1259       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1260       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1261 
1262       /* update rhs: bb1 = bb - B*x */
1263       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1264       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1265 
1266       /* local sweep */
1267       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1268     }
1269   }  else if (flag & SOR_EISENSTAT) {
1270     Vec         xx1;
1271 
1272     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1273     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1274 
1275     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1276     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1277     if (!mat->diag) {
1278       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1279       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1280     }
1281     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1282     if (hasop) {
1283       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1284     } else {
1285       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1286     }
1287     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1288 
1289     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1290 
1291     /* local sweep */
1292     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1293     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1294     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1295   } else {
1296     SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1297   }
1298 
1299   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 #undef __FUNCT__
1304 #define __FUNCT__ "MatPermute_MPIAIJ"
1305 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1306 {
1307   MPI_Comm       comm,pcomm;
1308   PetscInt       first,local_size,nrows;
1309   const PetscInt *rows;
1310   PetscMPIInt    size;
1311   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1312   PetscErrorCode ierr;
1313 
1314   PetscFunctionBegin;
1315   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1316   /* make a collective version of 'rowp' */
1317   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1318   if (pcomm==comm) {
1319     crowp = rowp;
1320   } else {
1321     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1322     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1323     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1324     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1325   }
1326   /* collect the global row permutation and invert it */
1327   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1328   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1329   if (pcomm!=comm) {
1330     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1331   }
1332   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1333   /* get the local target indices */
1334   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1335   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1336   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1337   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
1338   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1339   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1340   /* the column permutation is so much easier;
1341      make a local version of 'colp' and invert it */
1342   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1343   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1344   if (size==1) {
1345     lcolp = colp;
1346   } else {
1347     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1348     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1349     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
1350   }
1351   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1352   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1353   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1354   if (size>1) {
1355     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1356     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1357   }
1358   /* now we just get the submatrix */
1359   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1360   /* clean up */
1361   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1362   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 #undef __FUNCT__
1367 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1368 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1369 {
1370   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1371   Mat            A = mat->A,B = mat->B;
1372   PetscErrorCode ierr;
1373   PetscReal      isend[5],irecv[5];
1374 
1375   PetscFunctionBegin;
1376   info->block_size     = 1.0;
1377   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1378   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1379   isend[3] = info->memory;  isend[4] = info->mallocs;
1380   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1381   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1382   isend[3] += info->memory;  isend[4] += info->mallocs;
1383   if (flag == MAT_LOCAL) {
1384     info->nz_used      = isend[0];
1385     info->nz_allocated = isend[1];
1386     info->nz_unneeded  = isend[2];
1387     info->memory       = isend[3];
1388     info->mallocs      = isend[4];
1389   } else if (flag == MAT_GLOBAL_MAX) {
1390     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1391     info->nz_used      = irecv[0];
1392     info->nz_allocated = irecv[1];
1393     info->nz_unneeded  = irecv[2];
1394     info->memory       = irecv[3];
1395     info->mallocs      = irecv[4];
1396   } else if (flag == MAT_GLOBAL_SUM) {
1397     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1398     info->nz_used      = irecv[0];
1399     info->nz_allocated = irecv[1];
1400     info->nz_unneeded  = irecv[2];
1401     info->memory       = irecv[3];
1402     info->mallocs      = irecv[4];
1403   }
1404   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1405   info->fill_ratio_needed = 0;
1406   info->factor_mallocs    = 0;
1407 
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 #undef __FUNCT__
1412 #define __FUNCT__ "MatSetOption_MPIAIJ"
1413 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1414 {
1415   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1416   PetscErrorCode ierr;
1417 
1418   PetscFunctionBegin;
1419   switch (op) {
1420   case MAT_NEW_NONZERO_LOCATIONS:
1421   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1422   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1423   case MAT_KEEP_NONZERO_PATTERN:
1424   case MAT_NEW_NONZERO_LOCATION_ERR:
1425   case MAT_USE_INODES:
1426   case MAT_IGNORE_ZERO_ENTRIES:
1427     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1428     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1429     break;
1430   case MAT_ROW_ORIENTED:
1431     a->roworiented = flg;
1432     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1433     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1434     break;
1435   case MAT_NEW_DIAGONALS:
1436     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1437     break;
1438   case MAT_IGNORE_OFF_PROC_ENTRIES:
1439     a->donotstash = PETSC_TRUE;
1440     break;
1441   case MAT_SYMMETRIC:
1442     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1443     break;
1444   case MAT_STRUCTURALLY_SYMMETRIC:
1445     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1446     break;
1447   case MAT_HERMITIAN:
1448     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1449     break;
1450   case MAT_SYMMETRY_ETERNAL:
1451     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1452     break;
1453   default:
1454     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1455   }
1456   PetscFunctionReturn(0);
1457 }
1458 
1459 #undef __FUNCT__
1460 #define __FUNCT__ "MatGetRow_MPIAIJ"
1461 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1462 {
1463   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1464   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1465   PetscErrorCode ierr;
1466   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1467   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1468   PetscInt       *cmap,*idx_p;
1469 
1470   PetscFunctionBegin;
1471   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1472   mat->getrowactive = PETSC_TRUE;
1473 
1474   if (!mat->rowvalues && (idx || v)) {
1475     /*
1476         allocate enough space to hold information from the longest row.
1477     */
1478     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1479     PetscInt   max = 1,tmp;
1480     for (i=0; i<matin->rmap->n; i++) {
1481       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1482       if (max < tmp) { max = tmp; }
1483     }
1484     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1485   }
1486 
1487   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1488   lrow = row - rstart;
1489 
1490   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1491   if (!v)   {pvA = 0; pvB = 0;}
1492   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1493   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1494   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1495   nztot = nzA + nzB;
1496 
1497   cmap  = mat->garray;
1498   if (v  || idx) {
1499     if (nztot) {
1500       /* Sort by increasing column numbers, assuming A and B already sorted */
1501       PetscInt imark = -1;
1502       if (v) {
1503         *v = v_p = mat->rowvalues;
1504         for (i=0; i<nzB; i++) {
1505           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1506           else break;
1507         }
1508         imark = i;
1509         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1510         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1511       }
1512       if (idx) {
1513         *idx = idx_p = mat->rowindices;
1514         if (imark > -1) {
1515           for (i=0; i<imark; i++) {
1516             idx_p[i] = cmap[cworkB[i]];
1517           }
1518         } else {
1519           for (i=0; i<nzB; i++) {
1520             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1521             else break;
1522           }
1523           imark = i;
1524         }
1525         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1526         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1527       }
1528     } else {
1529       if (idx) *idx = 0;
1530       if (v)   *v   = 0;
1531     }
1532   }
1533   *nz = nztot;
1534   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1535   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1536   PetscFunctionReturn(0);
1537 }
1538 
1539 #undef __FUNCT__
1540 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1541 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1542 {
1543   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1544 
1545   PetscFunctionBegin;
1546   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1547   aij->getrowactive = PETSC_FALSE;
1548   PetscFunctionReturn(0);
1549 }
1550 
1551 #undef __FUNCT__
1552 #define __FUNCT__ "MatNorm_MPIAIJ"
1553 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1554 {
1555   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1556   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1557   PetscErrorCode ierr;
1558   PetscInt       i,j,cstart = mat->cmap->rstart;
1559   PetscReal      sum = 0.0;
1560   MatScalar      *v;
1561 
1562   PetscFunctionBegin;
1563   if (aij->size == 1) {
1564     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1565   } else {
1566     if (type == NORM_FROBENIUS) {
1567       v = amat->a;
1568       for (i=0; i<amat->nz; i++) {
1569 #if defined(PETSC_USE_COMPLEX)
1570         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1571 #else
1572         sum += (*v)*(*v); v++;
1573 #endif
1574       }
1575       v = bmat->a;
1576       for (i=0; i<bmat->nz; i++) {
1577 #if defined(PETSC_USE_COMPLEX)
1578         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1579 #else
1580         sum += (*v)*(*v); v++;
1581 #endif
1582       }
1583       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1584       *norm = sqrt(*norm);
1585     } else if (type == NORM_1) { /* max column norm */
1586       PetscReal *tmp,*tmp2;
1587       PetscInt  *jj,*garray = aij->garray;
1588       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1589       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1590       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1591       *norm = 0.0;
1592       v = amat->a; jj = amat->j;
1593       for (j=0; j<amat->nz; j++) {
1594         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1595       }
1596       v = bmat->a; jj = bmat->j;
1597       for (j=0; j<bmat->nz; j++) {
1598         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1599       }
1600       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1601       for (j=0; j<mat->cmap->N; j++) {
1602         if (tmp2[j] > *norm) *norm = tmp2[j];
1603       }
1604       ierr = PetscFree(tmp);CHKERRQ(ierr);
1605       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1606     } else if (type == NORM_INFINITY) { /* max row norm */
1607       PetscReal ntemp = 0.0;
1608       for (j=0; j<aij->A->rmap->n; j++) {
1609         v = amat->a + amat->i[j];
1610         sum = 0.0;
1611         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1612           sum += PetscAbsScalar(*v); v++;
1613         }
1614         v = bmat->a + bmat->i[j];
1615         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1616           sum += PetscAbsScalar(*v); v++;
1617         }
1618         if (sum > ntemp) ntemp = sum;
1619       }
1620       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1621     } else {
1622       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1623     }
1624   }
1625   PetscFunctionReturn(0);
1626 }
1627 
1628 #undef __FUNCT__
1629 #define __FUNCT__ "MatTranspose_MPIAIJ"
1630 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1631 {
1632   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1633   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1634   PetscErrorCode ierr;
1635   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1636   PetscInt       cstart=A->cmap->rstart,ncol;
1637   Mat            B;
1638   MatScalar      *array;
1639 
1640   PetscFunctionBegin;
1641   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1642 
1643   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1644   ai = Aloc->i; aj = Aloc->j;
1645   bi = Bloc->i; bj = Bloc->j;
1646   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1647     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1648     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1649     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1650     for (i=0; i<ai[ma]; i++){
1651       d_nnz[aj[i]] ++;
1652       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1653     }
1654 
1655     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1656     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1657     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1658     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1659     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1660   } else {
1661     B = *matout;
1662   }
1663 
1664   /* copy over the A part */
1665   array = Aloc->a;
1666   row = A->rmap->rstart;
1667   for (i=0; i<ma; i++) {
1668     ncol = ai[i+1]-ai[i];
1669     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1670     row++; array += ncol; aj += ncol;
1671   }
1672   aj = Aloc->j;
1673   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1674 
1675   /* copy over the B part */
1676   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1677   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1678   array = Bloc->a;
1679   row = A->rmap->rstart;
1680   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1681   cols_tmp = cols;
1682   for (i=0; i<mb; i++) {
1683     ncol = bi[i+1]-bi[i];
1684     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1685     row++; array += ncol; cols_tmp += ncol;
1686   }
1687   ierr = PetscFree(cols);CHKERRQ(ierr);
1688 
1689   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1690   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1691   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1692     *matout = B;
1693   } else {
1694     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1695   }
1696   PetscFunctionReturn(0);
1697 }
1698 
1699 #undef __FUNCT__
1700 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1701 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1702 {
1703   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1704   Mat            a = aij->A,b = aij->B;
1705   PetscErrorCode ierr;
1706   PetscInt       s1,s2,s3;
1707 
1708   PetscFunctionBegin;
1709   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1710   if (rr) {
1711     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1712     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1713     /* Overlap communication with computation. */
1714     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1715   }
1716   if (ll) {
1717     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1718     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1719     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1720   }
1721   /* scale  the diagonal block */
1722   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1723 
1724   if (rr) {
1725     /* Do a scatter end and then right scale the off-diagonal block */
1726     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1727     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1728   }
1729 
1730   PetscFunctionReturn(0);
1731 }
1732 
1733 #undef __FUNCT__
1734 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1735 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1736 {
1737   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1738   PetscErrorCode ierr;
1739 
1740   PetscFunctionBegin;
1741   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1742   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1743   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
1744   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
1745   PetscFunctionReturn(0);
1746 }
1747 #undef __FUNCT__
1748 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1749 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1750 {
1751   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1752   PetscErrorCode ierr;
1753 
1754   PetscFunctionBegin;
1755   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1756   PetscFunctionReturn(0);
1757 }
1758 
1759 #undef __FUNCT__
1760 #define __FUNCT__ "MatEqual_MPIAIJ"
1761 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1762 {
1763   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1764   Mat            a,b,c,d;
1765   PetscTruth     flg;
1766   PetscErrorCode ierr;
1767 
1768   PetscFunctionBegin;
1769   a = matA->A; b = matA->B;
1770   c = matB->A; d = matB->B;
1771 
1772   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1773   if (flg) {
1774     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1775   }
1776   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1777   PetscFunctionReturn(0);
1778 }
1779 
1780 #undef __FUNCT__
1781 #define __FUNCT__ "MatCopy_MPIAIJ"
1782 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1783 {
1784   PetscErrorCode ierr;
1785   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1786   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1787 
1788   PetscFunctionBegin;
1789   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1790   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1791     /* because of the column compression in the off-processor part of the matrix a->B,
1792        the number of columns in a->B and b->B may be different, hence we cannot call
1793        the MatCopy() directly on the two parts. If need be, we can provide a more
1794        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1795        then copying the submatrices */
1796     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1797   } else {
1798     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1799     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1800   }
1801   PetscFunctionReturn(0);
1802 }
1803 
1804 #undef __FUNCT__
1805 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1806 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1807 {
1808   PetscErrorCode ierr;
1809 
1810   PetscFunctionBegin;
1811   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1812   PetscFunctionReturn(0);
1813 }
1814 
1815 #undef __FUNCT__
1816 #define __FUNCT__ "MatAXPY_MPIAIJ"
1817 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1818 {
1819   PetscErrorCode ierr;
1820   PetscInt       i;
1821   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1822   PetscBLASInt   bnz,one=1;
1823   Mat_SeqAIJ     *x,*y;
1824 
1825   PetscFunctionBegin;
1826   if (str == SAME_NONZERO_PATTERN) {
1827     PetscScalar alpha = a;
1828     x = (Mat_SeqAIJ *)xx->A->data;
1829     y = (Mat_SeqAIJ *)yy->A->data;
1830     bnz = PetscBLASIntCast(x->nz);
1831     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1832     x = (Mat_SeqAIJ *)xx->B->data;
1833     y = (Mat_SeqAIJ *)yy->B->data;
1834     bnz = PetscBLASIntCast(x->nz);
1835     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1836   } else if (str == SUBSET_NONZERO_PATTERN) {
1837     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1838 
1839     x = (Mat_SeqAIJ *)xx->B->data;
1840     y = (Mat_SeqAIJ *)yy->B->data;
1841     if (y->xtoy && y->XtoY != xx->B) {
1842       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1843       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1844     }
1845     if (!y->xtoy) { /* get xtoy */
1846       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1847       y->XtoY = xx->B;
1848       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1849     }
1850     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1851   } else {
1852     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1853   }
1854   PetscFunctionReturn(0);
1855 }
1856 
1857 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1858 
1859 #undef __FUNCT__
1860 #define __FUNCT__ "MatConjugate_MPIAIJ"
1861 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1862 {
1863 #if defined(PETSC_USE_COMPLEX)
1864   PetscErrorCode ierr;
1865   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1866 
1867   PetscFunctionBegin;
1868   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1869   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1870 #else
1871   PetscFunctionBegin;
1872 #endif
1873   PetscFunctionReturn(0);
1874 }
1875 
1876 #undef __FUNCT__
1877 #define __FUNCT__ "MatRealPart_MPIAIJ"
1878 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1879 {
1880   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1881   PetscErrorCode ierr;
1882 
1883   PetscFunctionBegin;
1884   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1885   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1886   PetscFunctionReturn(0);
1887 }
1888 
1889 #undef __FUNCT__
1890 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1891 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1892 {
1893   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1894   PetscErrorCode ierr;
1895 
1896   PetscFunctionBegin;
1897   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1898   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1899   PetscFunctionReturn(0);
1900 }
1901 
1902 #ifdef PETSC_HAVE_PBGL
1903 
1904 #include <boost/parallel/mpi/bsp_process_group.hpp>
1905 #include <boost/graph/distributed/ilu_default_graph.hpp>
1906 #include <boost/graph/distributed/ilu_0_block.hpp>
1907 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1908 #include <boost/graph/distributed/petsc/interface.hpp>
1909 #include <boost/multi_array.hpp>
1910 #include <boost/parallel/distributed_property_map->hpp>
1911 
1912 #undef __FUNCT__
1913 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1914 /*
1915   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1916 */
1917 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1918 {
1919   namespace petsc = boost::distributed::petsc;
1920 
1921   namespace graph_dist = boost::graph::distributed;
1922   using boost::graph::distributed::ilu_default::process_group_type;
1923   using boost::graph::ilu_permuted;
1924 
1925   PetscTruth      row_identity, col_identity;
1926   PetscContainer  c;
1927   PetscInt        m, n, M, N;
1928   PetscErrorCode  ierr;
1929 
1930   PetscFunctionBegin;
1931   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1932   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1933   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1934   if (!row_identity || !col_identity) {
1935     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1936   }
1937 
1938   process_group_type pg;
1939   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1940   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1941   lgraph_type&   level_graph = *lgraph_p;
1942   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1943 
1944   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1945   ilu_permuted(level_graph);
1946 
1947   /* put together the new matrix */
1948   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1949   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1950   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1951   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1952   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1953   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1954   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1955 
1956   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1957   ierr = PetscContainerSetPointer(c, lgraph_p);
1958   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1959   PetscFunctionReturn(0);
1960 }
1961 
1962 #undef __FUNCT__
1963 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1964 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1965 {
1966   PetscFunctionBegin;
1967   PetscFunctionReturn(0);
1968 }
1969 
1970 #undef __FUNCT__
1971 #define __FUNCT__ "MatSolve_MPIAIJ"
1972 /*
1973   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1974 */
1975 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1976 {
1977   namespace graph_dist = boost::graph::distributed;
1978 
1979   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1980   lgraph_type*   lgraph_p;
1981   PetscContainer c;
1982   PetscErrorCode ierr;
1983 
1984   PetscFunctionBegin;
1985   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1986   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1987   ierr = VecCopy(b, x);CHKERRQ(ierr);
1988 
1989   PetscScalar* array_x;
1990   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1991   PetscInt sx;
1992   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1993 
1994   PetscScalar* array_b;
1995   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1996   PetscInt sb;
1997   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1998 
1999   lgraph_type&   level_graph = *lgraph_p;
2000   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2001 
2002   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2003   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2004                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2005 
2006   typedef boost::iterator_property_map<array_ref_type::iterator,
2007                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2008   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2009                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2010 
2011   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2012 
2013   PetscFunctionReturn(0);
2014 }
2015 #endif
2016 
2017 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2018   PetscInt       nzlocal,nsends,nrecvs;
2019   PetscMPIInt    *send_rank,*recv_rank;
2020   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2021   PetscScalar    *sbuf_a,**rbuf_a;
2022   PetscErrorCode (*MatDestroy)(Mat);
2023 } Mat_Redundant;
2024 
2025 #undef __FUNCT__
2026 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2027 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2028 {
2029   PetscErrorCode       ierr;
2030   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2031   PetscInt             i;
2032 
2033   PetscFunctionBegin;
2034   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2035   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2036   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2037   for (i=0; i<redund->nrecvs; i++){
2038     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2039     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2040   }
2041   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2042   ierr = PetscFree(redund);CHKERRQ(ierr);
2043   PetscFunctionReturn(0);
2044 }
2045 
2046 #undef __FUNCT__
2047 #define __FUNCT__ "MatDestroy_MatRedundant"
2048 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2049 {
2050   PetscErrorCode  ierr;
2051   PetscContainer  container;
2052   Mat_Redundant   *redund=PETSC_NULL;
2053 
2054   PetscFunctionBegin;
2055   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2056   if (container) {
2057     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2058   } else {
2059     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2060   }
2061   A->ops->destroy = redund->MatDestroy;
2062   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2063   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2064   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2065   PetscFunctionReturn(0);
2066 }
2067 
2068 #undef __FUNCT__
2069 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2070 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2071 {
2072   PetscMPIInt    rank,size;
2073   MPI_Comm       comm=((PetscObject)mat)->comm;
2074   PetscErrorCode ierr;
2075   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2076   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2077   PetscInt       *rowrange=mat->rmap->range;
2078   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2079   Mat            A=aij->A,B=aij->B,C=*matredundant;
2080   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2081   PetscScalar    *sbuf_a;
2082   PetscInt       nzlocal=a->nz+b->nz;
2083   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2084   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2085   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2086   MatScalar      *aworkA,*aworkB;
2087   PetscScalar    *vals;
2088   PetscMPIInt    tag1,tag2,tag3,imdex;
2089   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2090                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2091   MPI_Status     recv_status,*send_status;
2092   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2093   PetscInt       **rbuf_j=PETSC_NULL;
2094   PetscScalar    **rbuf_a=PETSC_NULL;
2095   Mat_Redundant  *redund=PETSC_NULL;
2096   PetscContainer container;
2097 
2098   PetscFunctionBegin;
2099   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2100   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2101 
2102   if (reuse == MAT_REUSE_MATRIX) {
2103     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2104     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2105     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2106     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2107     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2108     if (container) {
2109       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2110     } else {
2111       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2112     }
2113     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2114 
2115     nsends    = redund->nsends;
2116     nrecvs    = redund->nrecvs;
2117     send_rank = redund->send_rank;
2118     recv_rank = redund->recv_rank;
2119     sbuf_nz   = redund->sbuf_nz;
2120     rbuf_nz   = redund->rbuf_nz;
2121     sbuf_j    = redund->sbuf_j;
2122     sbuf_a    = redund->sbuf_a;
2123     rbuf_j    = redund->rbuf_j;
2124     rbuf_a    = redund->rbuf_a;
2125   }
2126 
2127   if (reuse == MAT_INITIAL_MATRIX){
2128     PetscMPIInt  subrank,subsize;
2129     PetscInt     nleftover,np_subcomm;
2130     /* get the destination processors' id send_rank, nsends and nrecvs */
2131     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2132     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2133     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2134     np_subcomm = size/nsubcomm;
2135     nleftover  = size - nsubcomm*np_subcomm;
2136     nsends = 0; nrecvs = 0;
2137     for (i=0; i<size; i++){ /* i=rank*/
2138       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2139         send_rank[nsends] = i; nsends++;
2140         recv_rank[nrecvs++] = i;
2141       }
2142     }
2143     if (rank >= size - nleftover){/* this proc is a leftover processor */
2144       i = size-nleftover-1;
2145       j = 0;
2146       while (j < nsubcomm - nleftover){
2147         send_rank[nsends++] = i;
2148         i--; j++;
2149       }
2150     }
2151 
2152     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2153       for (i=0; i<nleftover; i++){
2154         recv_rank[nrecvs++] = size-nleftover+i;
2155       }
2156     }
2157 
2158     /* allocate sbuf_j, sbuf_a */
2159     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2160     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2161     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2162   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2163 
2164   /* copy mat's local entries into the buffers */
2165   if (reuse == MAT_INITIAL_MATRIX){
2166     rownz_max = 0;
2167     rptr = sbuf_j;
2168     cols = sbuf_j + rend-rstart + 1;
2169     vals = sbuf_a;
2170     rptr[0] = 0;
2171     for (i=0; i<rend-rstart; i++){
2172       row = i + rstart;
2173       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2174       ncols  = nzA + nzB;
2175       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2176       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2177       /* load the column indices for this row into cols */
2178       lwrite = 0;
2179       for (l=0; l<nzB; l++) {
2180         if ((ctmp = bmap[cworkB[l]]) < cstart){
2181           vals[lwrite]   = aworkB[l];
2182           cols[lwrite++] = ctmp;
2183         }
2184       }
2185       for (l=0; l<nzA; l++){
2186         vals[lwrite]   = aworkA[l];
2187         cols[lwrite++] = cstart + cworkA[l];
2188       }
2189       for (l=0; l<nzB; l++) {
2190         if ((ctmp = bmap[cworkB[l]]) >= cend){
2191           vals[lwrite]   = aworkB[l];
2192           cols[lwrite++] = ctmp;
2193         }
2194       }
2195       vals += ncols;
2196       cols += ncols;
2197       rptr[i+1] = rptr[i] + ncols;
2198       if (rownz_max < ncols) rownz_max = ncols;
2199     }
2200     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2201   } else { /* only copy matrix values into sbuf_a */
2202     rptr = sbuf_j;
2203     vals = sbuf_a;
2204     rptr[0] = 0;
2205     for (i=0; i<rend-rstart; i++){
2206       row = i + rstart;
2207       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2208       ncols  = nzA + nzB;
2209       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2210       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2211       lwrite = 0;
2212       for (l=0; l<nzB; l++) {
2213         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2214       }
2215       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2216       for (l=0; l<nzB; l++) {
2217         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2218       }
2219       vals += ncols;
2220       rptr[i+1] = rptr[i] + ncols;
2221     }
2222   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2223 
2224   /* send nzlocal to others, and recv other's nzlocal */
2225   /*--------------------------------------------------*/
2226   if (reuse == MAT_INITIAL_MATRIX){
2227     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2228     s_waits2 = s_waits3 + nsends;
2229     s_waits1 = s_waits2 + nsends;
2230     r_waits1 = s_waits1 + nsends;
2231     r_waits2 = r_waits1 + nrecvs;
2232     r_waits3 = r_waits2 + nrecvs;
2233   } else {
2234     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2235     r_waits3 = s_waits3 + nsends;
2236   }
2237 
2238   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2239   if (reuse == MAT_INITIAL_MATRIX){
2240     /* get new tags to keep the communication clean */
2241     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2242     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2243     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2244 
2245     /* post receives of other's nzlocal */
2246     for (i=0; i<nrecvs; i++){
2247       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2248     }
2249     /* send nzlocal to others */
2250     for (i=0; i<nsends; i++){
2251       sbuf_nz[i] = nzlocal;
2252       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2253     }
2254     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2255     count = nrecvs;
2256     while (count) {
2257       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2258       recv_rank[imdex] = recv_status.MPI_SOURCE;
2259       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2260       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2261 
2262       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2263       rbuf_nz[imdex] += i + 2;
2264       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2265       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2266       count--;
2267     }
2268     /* wait on sends of nzlocal */
2269     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2270     /* send mat->i,j to others, and recv from other's */
2271     /*------------------------------------------------*/
2272     for (i=0; i<nsends; i++){
2273       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2274       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2275     }
2276     /* wait on receives of mat->i,j */
2277     /*------------------------------*/
2278     count = nrecvs;
2279     while (count) {
2280       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2281       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2282       count--;
2283     }
2284     /* wait on sends of mat->i,j */
2285     /*---------------------------*/
2286     if (nsends) {
2287       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2288     }
2289   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2290 
2291   /* post receives, send and receive mat->a */
2292   /*----------------------------------------*/
2293   for (imdex=0; imdex<nrecvs; imdex++) {
2294     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2295   }
2296   for (i=0; i<nsends; i++){
2297     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2298   }
2299   count = nrecvs;
2300   while (count) {
2301     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2302     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2303     count--;
2304   }
2305   if (nsends) {
2306     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2307   }
2308 
2309   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2310 
2311   /* create redundant matrix */
2312   /*-------------------------*/
2313   if (reuse == MAT_INITIAL_MATRIX){
2314     /* compute rownz_max for preallocation */
2315     for (imdex=0; imdex<nrecvs; imdex++){
2316       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2317       rptr = rbuf_j[imdex];
2318       for (i=0; i<j; i++){
2319         ncols = rptr[i+1] - rptr[i];
2320         if (rownz_max < ncols) rownz_max = ncols;
2321       }
2322     }
2323 
2324     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2325     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2326     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2327     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2328     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2329   } else {
2330     C = *matredundant;
2331   }
2332 
2333   /* insert local matrix entries */
2334   rptr = sbuf_j;
2335   cols = sbuf_j + rend-rstart + 1;
2336   vals = sbuf_a;
2337   for (i=0; i<rend-rstart; i++){
2338     row   = i + rstart;
2339     ncols = rptr[i+1] - rptr[i];
2340     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2341     vals += ncols;
2342     cols += ncols;
2343   }
2344   /* insert received matrix entries */
2345   for (imdex=0; imdex<nrecvs; imdex++){
2346     rstart = rowrange[recv_rank[imdex]];
2347     rend   = rowrange[recv_rank[imdex]+1];
2348     rptr = rbuf_j[imdex];
2349     cols = rbuf_j[imdex] + rend-rstart + 1;
2350     vals = rbuf_a[imdex];
2351     for (i=0; i<rend-rstart; i++){
2352       row   = i + rstart;
2353       ncols = rptr[i+1] - rptr[i];
2354       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2355       vals += ncols;
2356       cols += ncols;
2357     }
2358   }
2359   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2360   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2361   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2362   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2363   if (reuse == MAT_INITIAL_MATRIX){
2364     PetscContainer container;
2365     *matredundant = C;
2366     /* create a supporting struct and attach it to C for reuse */
2367     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2368     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2369     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2370     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2371     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2372 
2373     redund->nzlocal = nzlocal;
2374     redund->nsends  = nsends;
2375     redund->nrecvs  = nrecvs;
2376     redund->send_rank = send_rank;
2377     redund->recv_rank = recv_rank;
2378     redund->sbuf_nz = sbuf_nz;
2379     redund->rbuf_nz = rbuf_nz;
2380     redund->sbuf_j  = sbuf_j;
2381     redund->sbuf_a  = sbuf_a;
2382     redund->rbuf_j  = rbuf_j;
2383     redund->rbuf_a  = rbuf_a;
2384 
2385     redund->MatDestroy = C->ops->destroy;
2386     C->ops->destroy    = MatDestroy_MatRedundant;
2387   }
2388   PetscFunctionReturn(0);
2389 }
2390 
2391 #undef __FUNCT__
2392 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2393 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2394 {
2395   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2396   PetscErrorCode ierr;
2397   PetscInt       i,*idxb = 0;
2398   PetscScalar    *va,*vb;
2399   Vec            vtmp;
2400 
2401   PetscFunctionBegin;
2402   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2403   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2404   if (idx) {
2405     for (i=0; i<A->rmap->n; i++) {
2406       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2407     }
2408   }
2409 
2410   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2411   if (idx) {
2412     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2413   }
2414   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2415   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2416 
2417   for (i=0; i<A->rmap->n; i++){
2418     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2419       va[i] = vb[i];
2420       if (idx) idx[i] = a->garray[idxb[i]];
2421     }
2422   }
2423 
2424   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2425   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2426   if (idxb) {
2427     ierr = PetscFree(idxb);CHKERRQ(ierr);
2428   }
2429   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2430   PetscFunctionReturn(0);
2431 }
2432 
2433 #undef __FUNCT__
2434 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2435 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2436 {
2437   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2438   PetscErrorCode ierr;
2439   PetscInt       i,*idxb = 0;
2440   PetscScalar    *va,*vb;
2441   Vec            vtmp;
2442 
2443   PetscFunctionBegin;
2444   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2445   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2446   if (idx) {
2447     for (i=0; i<A->cmap->n; i++) {
2448       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2449     }
2450   }
2451 
2452   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2453   if (idx) {
2454     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2455   }
2456   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2457   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2458 
2459   for (i=0; i<A->rmap->n; i++){
2460     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2461       va[i] = vb[i];
2462       if (idx) idx[i] = a->garray[idxb[i]];
2463     }
2464   }
2465 
2466   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2467   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2468   if (idxb) {
2469     ierr = PetscFree(idxb);CHKERRQ(ierr);
2470   }
2471   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2472   PetscFunctionReturn(0);
2473 }
2474 
2475 #undef __FUNCT__
2476 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2477 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2478 {
2479   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2480   PetscInt       n      = A->rmap->n;
2481   PetscInt       cstart = A->cmap->rstart;
2482   PetscInt      *cmap   = mat->garray;
2483   PetscInt      *diagIdx, *offdiagIdx;
2484   Vec            diagV, offdiagV;
2485   PetscScalar   *a, *diagA, *offdiagA;
2486   PetscInt       r;
2487   PetscErrorCode ierr;
2488 
2489   PetscFunctionBegin;
2490   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2491   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2492   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2493   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2494   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2495   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2496   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2497   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2498   for(r = 0; r < n; ++r) {
2499     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2500       a[r]   = diagA[r];
2501       idx[r] = cstart + diagIdx[r];
2502     } else {
2503       a[r]   = offdiagA[r];
2504       idx[r] = cmap[offdiagIdx[r]];
2505     }
2506   }
2507   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2508   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2509   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2510   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2511   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2512   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2513   PetscFunctionReturn(0);
2514 }
2515 
2516 #undef __FUNCT__
2517 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2518 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2519 {
2520   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2521   PetscInt       n      = A->rmap->n;
2522   PetscInt       cstart = A->cmap->rstart;
2523   PetscInt      *cmap   = mat->garray;
2524   PetscInt      *diagIdx, *offdiagIdx;
2525   Vec            diagV, offdiagV;
2526   PetscScalar   *a, *diagA, *offdiagA;
2527   PetscInt       r;
2528   PetscErrorCode ierr;
2529 
2530   PetscFunctionBegin;
2531   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2532   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2533   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2534   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2535   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2536   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2537   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2538   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2539   for(r = 0; r < n; ++r) {
2540     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2541       a[r]   = diagA[r];
2542       idx[r] = cstart + diagIdx[r];
2543     } else {
2544       a[r]   = offdiagA[r];
2545       idx[r] = cmap[offdiagIdx[r]];
2546     }
2547   }
2548   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2549   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2550   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2551   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2552   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2553   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2554   PetscFunctionReturn(0);
2555 }
2556 
2557 #undef __FUNCT__
2558 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2559 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2560 {
2561   PetscErrorCode ierr;
2562   Mat            *dummy;
2563 
2564   PetscFunctionBegin;
2565   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2566   *newmat = *dummy;
2567   ierr = PetscFree(dummy);CHKERRQ(ierr);
2568   PetscFunctionReturn(0);
2569 }
2570 
2571 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2572 /* -------------------------------------------------------------------*/
2573 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2574        MatGetRow_MPIAIJ,
2575        MatRestoreRow_MPIAIJ,
2576        MatMult_MPIAIJ,
2577 /* 4*/ MatMultAdd_MPIAIJ,
2578        MatMultTranspose_MPIAIJ,
2579        MatMultTransposeAdd_MPIAIJ,
2580 #ifdef PETSC_HAVE_PBGL
2581        MatSolve_MPIAIJ,
2582 #else
2583        0,
2584 #endif
2585        0,
2586        0,
2587 /*10*/ 0,
2588        0,
2589        0,
2590        MatSOR_MPIAIJ,
2591        MatTranspose_MPIAIJ,
2592 /*15*/ MatGetInfo_MPIAIJ,
2593        MatEqual_MPIAIJ,
2594        MatGetDiagonal_MPIAIJ,
2595        MatDiagonalScale_MPIAIJ,
2596        MatNorm_MPIAIJ,
2597 /*20*/ MatAssemblyBegin_MPIAIJ,
2598        MatAssemblyEnd_MPIAIJ,
2599        MatSetOption_MPIAIJ,
2600        MatZeroEntries_MPIAIJ,
2601 /*24*/ MatZeroRows_MPIAIJ,
2602        0,
2603 #ifdef PETSC_HAVE_PBGL
2604        0,
2605 #else
2606        0,
2607 #endif
2608        0,
2609        0,
2610 /*29*/ MatSetUpPreallocation_MPIAIJ,
2611 #ifdef PETSC_HAVE_PBGL
2612        0,
2613 #else
2614        0,
2615 #endif
2616        0,
2617        0,
2618        0,
2619 /*34*/ MatDuplicate_MPIAIJ,
2620        0,
2621        0,
2622        0,
2623        0,
2624 /*39*/ MatAXPY_MPIAIJ,
2625        MatGetSubMatrices_MPIAIJ,
2626        MatIncreaseOverlap_MPIAIJ,
2627        MatGetValues_MPIAIJ,
2628        MatCopy_MPIAIJ,
2629 /*44*/ MatGetRowMax_MPIAIJ,
2630        MatScale_MPIAIJ,
2631        0,
2632        0,
2633        0,
2634 /*49*/ MatSetBlockSize_MPIAIJ,
2635        0,
2636        0,
2637        0,
2638        0,
2639 /*54*/ MatFDColoringCreate_MPIAIJ,
2640        0,
2641        MatSetUnfactored_MPIAIJ,
2642        MatPermute_MPIAIJ,
2643        0,
2644 /*59*/ MatGetSubMatrix_MPIAIJ,
2645        MatDestroy_MPIAIJ,
2646        MatView_MPIAIJ,
2647        0,
2648        0,
2649 /*64*/ 0,
2650        0,
2651        0,
2652        0,
2653        0,
2654 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2655        MatGetRowMinAbs_MPIAIJ,
2656        0,
2657        MatSetColoring_MPIAIJ,
2658 #if defined(PETSC_HAVE_ADIC)
2659        MatSetValuesAdic_MPIAIJ,
2660 #else
2661        0,
2662 #endif
2663        MatSetValuesAdifor_MPIAIJ,
2664 /*75*/ MatFDColoringApply_AIJ,
2665        0,
2666        0,
2667        0,
2668        0,
2669 /*80*/ 0,
2670        0,
2671        0,
2672 /*83*/ 0,
2673        0,
2674        0,
2675        0,
2676        0,
2677        0,
2678 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2679        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2680        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2681        MatPtAP_Basic,
2682        MatPtAPSymbolic_MPIAIJ,
2683 /*94*/ MatPtAPNumeric_MPIAIJ,
2684        0,
2685        0,
2686        0,
2687        0,
2688 /*99*/ 0,
2689        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2690        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2691        MatConjugate_MPIAIJ,
2692        0,
2693 /*104*/MatSetValuesRow_MPIAIJ,
2694        MatRealPart_MPIAIJ,
2695        MatImaginaryPart_MPIAIJ,
2696        0,
2697        0,
2698 /*109*/0,
2699        MatGetRedundantMatrix_MPIAIJ,
2700        MatGetRowMin_MPIAIJ,
2701        0,
2702        0,
2703 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
2704        0,
2705        0,
2706        0,
2707        0,
2708 /*119*/0,
2709        0,
2710        0,
2711        0,
2712        MatLoadnew_MPIAIJ
2713 };
2714 
2715 /* ----------------------------------------------------------------------------------------*/
2716 
2717 EXTERN_C_BEGIN
2718 #undef __FUNCT__
2719 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2720 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2721 {
2722   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2723   PetscErrorCode ierr;
2724 
2725   PetscFunctionBegin;
2726   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2727   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2728   PetscFunctionReturn(0);
2729 }
2730 EXTERN_C_END
2731 
2732 EXTERN_C_BEGIN
2733 #undef __FUNCT__
2734 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2735 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2736 {
2737   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2738   PetscErrorCode ierr;
2739 
2740   PetscFunctionBegin;
2741   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2742   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2743   PetscFunctionReturn(0);
2744 }
2745 EXTERN_C_END
2746 
2747 #include "petscpc.h"
2748 EXTERN_C_BEGIN
2749 #undef __FUNCT__
2750 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2751 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2752 {
2753   Mat_MPIAIJ     *b;
2754   PetscErrorCode ierr;
2755   PetscInt       i;
2756 
2757   PetscFunctionBegin;
2758   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2759   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2760   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2761   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2762 
2763   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2764   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2765   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2766   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2767   if (d_nnz) {
2768     for (i=0; i<B->rmap->n; i++) {
2769       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2770     }
2771   }
2772   if (o_nnz) {
2773     for (i=0; i<B->rmap->n; i++) {
2774       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2775     }
2776   }
2777   b = (Mat_MPIAIJ*)B->data;
2778 
2779   if (!B->preallocated) {
2780     /* Explicitly create 2 MATSEQAIJ matrices. */
2781     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2782     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2783     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2784     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2785     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2786     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2787     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2788     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2789   }
2790 
2791   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2792   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2793   B->preallocated = PETSC_TRUE;
2794   PetscFunctionReturn(0);
2795 }
2796 EXTERN_C_END
2797 
2798 #undef __FUNCT__
2799 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2800 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2801 {
2802   Mat            mat;
2803   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2804   PetscErrorCode ierr;
2805 
2806   PetscFunctionBegin;
2807   *newmat       = 0;
2808   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2809   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2810   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2811   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2812   a    = (Mat_MPIAIJ*)mat->data;
2813 
2814   mat->factortype    = matin->factortype;
2815   mat->rmap->bs      = matin->rmap->bs;
2816   mat->assembled    = PETSC_TRUE;
2817   mat->insertmode   = NOT_SET_VALUES;
2818   mat->preallocated = PETSC_TRUE;
2819 
2820   a->size           = oldmat->size;
2821   a->rank           = oldmat->rank;
2822   a->donotstash     = oldmat->donotstash;
2823   a->roworiented    = oldmat->roworiented;
2824   a->rowindices     = 0;
2825   a->rowvalues      = 0;
2826   a->getrowactive   = PETSC_FALSE;
2827 
2828   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2829   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2830 
2831   if (oldmat->colmap) {
2832 #if defined (PETSC_USE_CTABLE)
2833     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2834 #else
2835     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2836     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2837     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2838 #endif
2839   } else a->colmap = 0;
2840   if (oldmat->garray) {
2841     PetscInt len;
2842     len  = oldmat->B->cmap->n;
2843     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2844     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2845     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2846   } else a->garray = 0;
2847 
2848   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2849   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2850   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2851   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2852   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2853   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2854   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2855   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2856   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2857   *newmat = mat;
2858   PetscFunctionReturn(0);
2859 }
2860 
2861 #undef __FUNCT__
2862 #define __FUNCT__ "MatLoad_MPIAIJ"
2863 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2864 {
2865   Mat            A;
2866   PetscScalar    *vals,*svals;
2867   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2868   MPI_Status     status;
2869   PetscErrorCode ierr;
2870   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz;
2871   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2872   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2873   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2874   PetscInt       cend,cstart,n,*rowners;
2875   int            fd;
2876 
2877   PetscFunctionBegin;
2878   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2879   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2880   if (!rank) {
2881     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2882     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2883     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2884   }
2885 
2886   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2887   M = header[1]; N = header[2];
2888   /* determine ownership of all rows */
2889   m    = M/size + ((M % size) > rank);
2890   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2891   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2892 
2893   /* First process needs enough room for process with most rows */
2894   if (!rank) {
2895     mmax       = rowners[1];
2896     for (i=2; i<size; i++) {
2897       mmax = PetscMax(mmax,rowners[i]);
2898     }
2899   } else mmax = m;
2900 
2901   rowners[0] = 0;
2902   for (i=2; i<=size; i++) {
2903     rowners[i] += rowners[i-1];
2904   }
2905   rstart = rowners[rank];
2906   rend   = rowners[rank+1];
2907 
2908   /* distribute row lengths to all processors */
2909   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2910   if (!rank) {
2911     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2912     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2913     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2914     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2915     for (j=0; j<m; j++) {
2916       procsnz[0] += ourlens[j];
2917     }
2918     for (i=1; i<size; i++) {
2919       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2920       /* calculate the number of nonzeros on each processor */
2921       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2922         procsnz[i] += rowlengths[j];
2923       }
2924       mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]);
2925       ierr   = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2926     }
2927     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2928   } else {
2929     mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr);
2930     ierr   = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2931   }
2932 
2933   if (!rank) {
2934     /* determine max buffer needed and allocate it */
2935     maxnz = 0;
2936     for (i=0; i<size; i++) {
2937       maxnz = PetscMax(maxnz,procsnz[i]);
2938     }
2939     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2940 
2941     /* read in my part of the matrix column indices  */
2942     nz   = procsnz[0];
2943     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2944     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2945 
2946     /* read in every one elses and ship off */
2947     for (i=1; i<size; i++) {
2948       nz     = procsnz[i];
2949       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2950       mpicnt = PetscMPIIntCast(nz);
2951       ierr   = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2952     }
2953     ierr = PetscFree(cols);CHKERRQ(ierr);
2954   } else {
2955     /* determine buffer space needed for message */
2956     nz = 0;
2957     for (i=0; i<m; i++) {
2958       nz += ourlens[i];
2959     }
2960     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2961 
2962     /* receive message of column indices*/
2963     mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr);
2964     ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2965     ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr);
2966     if (mpimaxnz == MPI_UNDEFINED) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);
2967     else if (mpimaxnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);
2968     else if (mpimaxnz != mpicnt) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);
2969   }
2970 
2971   /* determine column ownership if matrix is not square */
2972   if (N != M) {
2973     n      = N/size + ((N % size) > rank);
2974     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2975     cstart = cend - n;
2976   } else {
2977     cstart = rstart;
2978     cend   = rend;
2979     n      = cend - cstart;
2980   }
2981 
2982   /* loop over local rows, determining number of off diagonal entries */
2983   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2984   jj = 0;
2985   for (i=0; i<m; i++) {
2986     for (j=0; j<ourlens[i]; j++) {
2987       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2988       jj++;
2989     }
2990   }
2991 
2992   /* create our matrix */
2993   for (i=0; i<m; i++) {
2994     ourlens[i] -= offlens[i];
2995   }
2996   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2997   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2998   ierr = MatSetType(A,type);CHKERRQ(ierr);
2999   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
3000 
3001   for (i=0; i<m; i++) {
3002     ourlens[i] += offlens[i];
3003   }
3004 
3005   if (!rank) {
3006     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3007 
3008     /* read in my part of the matrix numerical values  */
3009     nz   = procsnz[0];
3010     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3011 
3012     /* insert into matrix */
3013     jj      = rstart;
3014     smycols = mycols;
3015     svals   = vals;
3016     for (i=0; i<m; i++) {
3017       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3018       smycols += ourlens[i];
3019       svals   += ourlens[i];
3020       jj++;
3021     }
3022 
3023     /* read in other processors and ship out */
3024     for (i=1; i<size; i++) {
3025       nz     = procsnz[i];
3026       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3027       mpicnt = PetscMPIIntCast(nz);
3028       ierr   = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3029     }
3030     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3031   } else {
3032     /* receive numeric values */
3033     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3034 
3035     /* receive message of values*/
3036     mpicnt = PetscMPIIntCast(nz);
3037     ierr   = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
3038     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr);
3039     if (mpimaxnz == MPI_UNDEFINED) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);
3040     else if (mpimaxnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);
3041     else if (mpimaxnz != mpicnt) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);
3042 
3043     /* insert into matrix */
3044     jj      = rstart;
3045     smycols = mycols;
3046     svals   = vals;
3047     for (i=0; i<m; i++) {
3048       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3049       smycols += ourlens[i];
3050       svals   += ourlens[i];
3051       jj++;
3052     }
3053   }
3054   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3055   ierr = PetscFree(vals);CHKERRQ(ierr);
3056   ierr = PetscFree(mycols);CHKERRQ(ierr);
3057   ierr = PetscFree(rowners);CHKERRQ(ierr);
3058 
3059   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3060   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3061   *newmat = A;
3062   PetscFunctionReturn(0);
3063 }
3064 
3065 #undef __FUNCT__
3066 #define __FUNCT__ "MatLoadnew_MPIAIJ"
3067 PetscErrorCode MatLoadnew_MPIAIJ(PetscViewer viewer, Mat newMat)
3068 {
3069   PetscScalar    *vals,*svals;
3070   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3071   MPI_Status     status;
3072   PetscErrorCode ierr;
3073   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz;
3074   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3075   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3076   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3077   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3078   int            fd;
3079 
3080   PetscFunctionBegin;
3081   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3082   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3083   if (!rank) {
3084     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3085     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3086     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3087   }
3088 
3089   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3090 
3091   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3092   M = header[1]; N = header[2];
3093   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3094   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3095   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3096 
3097   /* If global sizes are set, check if they are consistent with that given in the file */
3098   if (sizesset) {
3099     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3100   }
3101   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Incostintent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3102   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Incostintent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3103 
3104   /* determine ownership of all rows */
3105   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3106   else m = newMat->rmap->n; /* Set by user */
3107 
3108   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3109   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3110 
3111   /* First process needs enough room for process with most rows */
3112   if (!rank) {
3113     mmax       = rowners[1];
3114     for (i=2; i<size; i++) {
3115       mmax = PetscMax(mmax,rowners[i]);
3116     }
3117   } else mmax = m;
3118 
3119   rowners[0] = 0;
3120   for (i=2; i<=size; i++) {
3121     rowners[i] += rowners[i-1];
3122   }
3123   rstart = rowners[rank];
3124   rend   = rowners[rank+1];
3125 
3126   /* distribute row lengths to all processors */
3127   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3128   if (!rank) {
3129     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3130     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3131     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3132     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3133     for (j=0; j<m; j++) {
3134       procsnz[0] += ourlens[j];
3135     }
3136     for (i=1; i<size; i++) {
3137       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3138       /* calculate the number of nonzeros on each processor */
3139       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3140         procsnz[i] += rowlengths[j];
3141       }
3142       mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]);
3143       ierr   = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3144     }
3145     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3146   } else {
3147     mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr);
3148     ierr   = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3149   }
3150 
3151   if (!rank) {
3152     /* determine max buffer needed and allocate it */
3153     maxnz = 0;
3154     for (i=0; i<size; i++) {
3155       maxnz = PetscMax(maxnz,procsnz[i]);
3156     }
3157     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3158 
3159     /* read in my part of the matrix column indices  */
3160     nz   = procsnz[0];
3161     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3162     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3163 
3164     /* read in every one elses and ship off */
3165     for (i=1; i<size; i++) {
3166       nz     = procsnz[i];
3167       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3168       mpicnt = PetscMPIIntCast(nz);
3169       ierr   = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3170     }
3171     ierr = PetscFree(cols);CHKERRQ(ierr);
3172   } else {
3173     /* determine buffer space needed for message */
3174     nz = 0;
3175     for (i=0; i<m; i++) {
3176       nz += ourlens[i];
3177     }
3178     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3179 
3180     /* receive message of column indices*/
3181     mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr);
3182     ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3183     ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr);
3184     if (mpimaxnz == MPI_UNDEFINED) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);
3185     else if (mpimaxnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);
3186     else if (mpimaxnz != mpicnt) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);
3187   }
3188 
3189   /* determine column ownership if matrix is not square */
3190   if (N != M) {
3191     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3192     else n = newMat->cmap->n;
3193     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3194     cstart = cend - n;
3195   } else {
3196     cstart = rstart;
3197     cend   = rend;
3198     n      = cend - cstart;
3199   }
3200 
3201   /* loop over local rows, determining number of off diagonal entries */
3202   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3203   jj = 0;
3204   for (i=0; i<m; i++) {
3205     for (j=0; j<ourlens[i]; j++) {
3206       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3207       jj++;
3208     }
3209   }
3210 
3211   for (i=0; i<m; i++) {
3212     ourlens[i] -= offlens[i];
3213   }
3214   if (!sizesset) {
3215     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3216   }
3217   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3218 
3219   for (i=0; i<m; i++) {
3220     ourlens[i] += offlens[i];
3221   }
3222 
3223   if (!rank) {
3224     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3225 
3226     /* read in my part of the matrix numerical values  */
3227     nz   = procsnz[0];
3228     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3229 
3230     /* insert into matrix */
3231     jj      = rstart;
3232     smycols = mycols;
3233     svals   = vals;
3234     for (i=0; i<m; i++) {
3235       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3236       smycols += ourlens[i];
3237       svals   += ourlens[i];
3238       jj++;
3239     }
3240 
3241     /* read in other processors and ship out */
3242     for (i=1; i<size; i++) {
3243       nz     = procsnz[i];
3244       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3245       mpicnt = PetscMPIIntCast(nz);
3246       ierr   = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3247     }
3248     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3249   } else {
3250     /* receive numeric values */
3251     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3252 
3253     /* receive message of values*/
3254     mpicnt = PetscMPIIntCast(nz);
3255     ierr   = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm,&status);CHKERRQ(ierr);
3256     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr);
3257     if (mpimaxnz == MPI_UNDEFINED) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);
3258     else if (mpimaxnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);
3259     else if (mpimaxnz != mpicnt) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);
3260 
3261     /* insert into matrix */
3262     jj      = rstart;
3263     smycols = mycols;
3264     svals   = vals;
3265     for (i=0; i<m; i++) {
3266       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3267       smycols += ourlens[i];
3268       svals   += ourlens[i];
3269       jj++;
3270     }
3271   }
3272   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3273   ierr = PetscFree(vals);CHKERRQ(ierr);
3274   ierr = PetscFree(mycols);CHKERRQ(ierr);
3275   ierr = PetscFree(rowners);CHKERRQ(ierr);
3276 
3277   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3278   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3279   PetscFunctionReturn(0);
3280 }
3281 
3282 #undef __FUNCT__
3283 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3284 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3285 {
3286   PetscErrorCode ierr;
3287   IS             iscol_local;
3288   PetscInt       csize;
3289 
3290   PetscFunctionBegin;
3291   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3292   if (call == MAT_REUSE_MATRIX) {
3293     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3294     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3295   } else {
3296     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3297   }
3298   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3299   if (call == MAT_INITIAL_MATRIX) {
3300     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3301     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3302   }
3303   PetscFunctionReturn(0);
3304 }
3305 
3306 #undef __FUNCT__
3307 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3308 /*
3309     Not great since it makes two copies of the submatrix, first an SeqAIJ
3310   in local and then by concatenating the local matrices the end result.
3311   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3312 
3313   Note: This requires a sequential iscol with all indices.
3314 */
3315 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3316 {
3317   PetscErrorCode ierr;
3318   PetscMPIInt    rank,size;
3319   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3320   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3321   Mat            *local,M,Mreuse;
3322   MatScalar      *vwork,*aa;
3323   MPI_Comm       comm = ((PetscObject)mat)->comm;
3324   Mat_SeqAIJ     *aij;
3325 
3326 
3327   PetscFunctionBegin;
3328   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3329   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3330 
3331   if (call ==  MAT_REUSE_MATRIX) {
3332     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3333     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3334     local = &Mreuse;
3335     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3336   } else {
3337     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3338     Mreuse = *local;
3339     ierr   = PetscFree(local);CHKERRQ(ierr);
3340   }
3341 
3342   /*
3343       m - number of local rows
3344       n - number of columns (same on all processors)
3345       rstart - first row in new global matrix generated
3346   */
3347   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3348   if (call == MAT_INITIAL_MATRIX) {
3349     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3350     ii  = aij->i;
3351     jj  = aij->j;
3352 
3353     /*
3354         Determine the number of non-zeros in the diagonal and off-diagonal
3355         portions of the matrix in order to do correct preallocation
3356     */
3357 
3358     /* first get start and end of "diagonal" columns */
3359     if (csize == PETSC_DECIDE) {
3360       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3361       if (mglobal == n) { /* square matrix */
3362 	nlocal = m;
3363       } else {
3364         nlocal = n/size + ((n % size) > rank);
3365       }
3366     } else {
3367       nlocal = csize;
3368     }
3369     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3370     rstart = rend - nlocal;
3371     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3372 
3373     /* next, compute all the lengths */
3374     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3375     olens = dlens + m;
3376     for (i=0; i<m; i++) {
3377       jend = ii[i+1] - ii[i];
3378       olen = 0;
3379       dlen = 0;
3380       for (j=0; j<jend; j++) {
3381         if (*jj < rstart || *jj >= rend) olen++;
3382         else dlen++;
3383         jj++;
3384       }
3385       olens[i] = olen;
3386       dlens[i] = dlen;
3387     }
3388     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3389     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3390     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3391     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3392     ierr = PetscFree(dlens);CHKERRQ(ierr);
3393   } else {
3394     PetscInt ml,nl;
3395 
3396     M = *newmat;
3397     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3398     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3399     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3400     /*
3401          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3402        rather than the slower MatSetValues().
3403     */
3404     M->was_assembled = PETSC_TRUE;
3405     M->assembled     = PETSC_FALSE;
3406   }
3407   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3408   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3409   ii  = aij->i;
3410   jj  = aij->j;
3411   aa  = aij->a;
3412   for (i=0; i<m; i++) {
3413     row   = rstart + i;
3414     nz    = ii[i+1] - ii[i];
3415     cwork = jj;     jj += nz;
3416     vwork = aa;     aa += nz;
3417     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3418   }
3419 
3420   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3421   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3422   *newmat = M;
3423 
3424   /* save submatrix used in processor for next request */
3425   if (call ==  MAT_INITIAL_MATRIX) {
3426     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3427     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3428   }
3429 
3430   PetscFunctionReturn(0);
3431 }
3432 
3433 EXTERN_C_BEGIN
3434 #undef __FUNCT__
3435 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3436 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3437 {
3438   PetscInt       m,cstart, cend,j,nnz,i,d;
3439   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3440   const PetscInt *JJ;
3441   PetscScalar    *values;
3442   PetscErrorCode ierr;
3443 
3444   PetscFunctionBegin;
3445   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3446 
3447   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3448   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3449   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3450   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3451   m      = B->rmap->n;
3452   cstart = B->cmap->rstart;
3453   cend   = B->cmap->rend;
3454   rstart = B->rmap->rstart;
3455 
3456   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3457 
3458 #if defined(PETSC_USE_DEBUGGING)
3459   for (i=0; i<m; i++) {
3460     nnz     = Ii[i+1]- Ii[i];
3461     JJ      = J + Ii[i];
3462     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3463     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3464     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3465   }
3466 #endif
3467 
3468   for (i=0; i<m; i++) {
3469     nnz     = Ii[i+1]- Ii[i];
3470     JJ      = J + Ii[i];
3471     nnz_max = PetscMax(nnz_max,nnz);
3472     d       = 0;
3473     for (j=0; j<nnz; j++) {
3474       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3475     }
3476     d_nnz[i] = d;
3477     o_nnz[i] = nnz - d;
3478   }
3479   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3480   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3481 
3482   if (v) values = (PetscScalar*)v;
3483   else {
3484     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3485     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3486   }
3487 
3488   for (i=0; i<m; i++) {
3489     ii   = i + rstart;
3490     nnz  = Ii[i+1]- Ii[i];
3491     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3492   }
3493   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3494   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3495 
3496   if (!v) {
3497     ierr = PetscFree(values);CHKERRQ(ierr);
3498   }
3499   PetscFunctionReturn(0);
3500 }
3501 EXTERN_C_END
3502 
3503 #undef __FUNCT__
3504 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3505 /*@
3506    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3507    (the default parallel PETSc format).
3508 
3509    Collective on MPI_Comm
3510 
3511    Input Parameters:
3512 +  B - the matrix
3513 .  i - the indices into j for the start of each local row (starts with zero)
3514 .  j - the column indices for each local row (starts with zero)
3515 -  v - optional values in the matrix
3516 
3517    Level: developer
3518 
3519    Notes:
3520        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3521      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3522      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3523 
3524        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3525 
3526        The format which is used for the sparse matrix input, is equivalent to a
3527     row-major ordering.. i.e for the following matrix, the input data expected is
3528     as shown:
3529 
3530         1 0 0
3531         2 0 3     P0
3532        -------
3533         4 5 6     P1
3534 
3535      Process0 [P0]: rows_owned=[0,1]
3536         i =  {0,1,3}  [size = nrow+1  = 2+1]
3537         j =  {0,0,2}  [size = nz = 6]
3538         v =  {1,2,3}  [size = nz = 6]
3539 
3540      Process1 [P1]: rows_owned=[2]
3541         i =  {0,3}    [size = nrow+1  = 1+1]
3542         j =  {0,1,2}  [size = nz = 6]
3543         v =  {4,5,6}  [size = nz = 6]
3544 
3545 .keywords: matrix, aij, compressed row, sparse, parallel
3546 
3547 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3548           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3549 @*/
3550 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3551 {
3552   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3553 
3554   PetscFunctionBegin;
3555   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3556   if (f) {
3557     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3558   }
3559   PetscFunctionReturn(0);
3560 }
3561 
3562 #undef __FUNCT__
3563 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3564 /*@C
3565    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3566    (the default parallel PETSc format).  For good matrix assembly performance
3567    the user should preallocate the matrix storage by setting the parameters
3568    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3569    performance can be increased by more than a factor of 50.
3570 
3571    Collective on MPI_Comm
3572 
3573    Input Parameters:
3574 +  A - the matrix
3575 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3576            (same value is used for all local rows)
3577 .  d_nnz - array containing the number of nonzeros in the various rows of the
3578            DIAGONAL portion of the local submatrix (possibly different for each row)
3579            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3580            The size of this array is equal to the number of local rows, i.e 'm'.
3581            You must leave room for the diagonal entry even if it is zero.
3582 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3583            submatrix (same value is used for all local rows).
3584 -  o_nnz - array containing the number of nonzeros in the various rows of the
3585            OFF-DIAGONAL portion of the local submatrix (possibly different for
3586            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3587            structure. The size of this array is equal to the number
3588            of local rows, i.e 'm'.
3589 
3590    If the *_nnz parameter is given then the *_nz parameter is ignored
3591 
3592    The AIJ format (also called the Yale sparse matrix format or
3593    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3594    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3595 
3596    The parallel matrix is partitioned such that the first m0 rows belong to
3597    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3598    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3599 
3600    The DIAGONAL portion of the local submatrix of a processor can be defined
3601    as the submatrix which is obtained by extraction the part corresponding
3602    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3603    first row that belongs to the processor, and r2 is the last row belonging
3604    to the this processor. This is a square mxm matrix. The remaining portion
3605    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3606 
3607    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3608 
3609    You can call MatGetInfo() to get information on how effective the preallocation was;
3610    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3611    You can also run with the option -info and look for messages with the string
3612    malloc in them to see if additional memory allocation was needed.
3613 
3614    Example usage:
3615 
3616    Consider the following 8x8 matrix with 34 non-zero values, that is
3617    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3618    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3619    as follows:
3620 
3621 .vb
3622             1  2  0  |  0  3  0  |  0  4
3623     Proc0   0  5  6  |  7  0  0  |  8  0
3624             9  0 10  | 11  0  0  | 12  0
3625     -------------------------------------
3626            13  0 14  | 15 16 17  |  0  0
3627     Proc1   0 18  0  | 19 20 21  |  0  0
3628             0  0  0  | 22 23  0  | 24  0
3629     -------------------------------------
3630     Proc2  25 26 27  |  0  0 28  | 29  0
3631            30  0  0  | 31 32 33  |  0 34
3632 .ve
3633 
3634    This can be represented as a collection of submatrices as:
3635 
3636 .vb
3637       A B C
3638       D E F
3639       G H I
3640 .ve
3641 
3642    Where the submatrices A,B,C are owned by proc0, D,E,F are
3643    owned by proc1, G,H,I are owned by proc2.
3644 
3645    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3646    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3647    The 'M','N' parameters are 8,8, and have the same values on all procs.
3648 
3649    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3650    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3651    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3652    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3653    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3654    matrix, ans [DF] as another SeqAIJ matrix.
3655 
3656    When d_nz, o_nz parameters are specified, d_nz storage elements are
3657    allocated for every row of the local diagonal submatrix, and o_nz
3658    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3659    One way to choose d_nz and o_nz is to use the max nonzerors per local
3660    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3661    In this case, the values of d_nz,o_nz are:
3662 .vb
3663      proc0 : dnz = 2, o_nz = 2
3664      proc1 : dnz = 3, o_nz = 2
3665      proc2 : dnz = 1, o_nz = 4
3666 .ve
3667    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3668    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3669    for proc3. i.e we are using 12+15+10=37 storage locations to store
3670    34 values.
3671 
3672    When d_nnz, o_nnz parameters are specified, the storage is specified
3673    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3674    In the above case the values for d_nnz,o_nnz are:
3675 .vb
3676      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3677      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3678      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3679 .ve
3680    Here the space allocated is sum of all the above values i.e 34, and
3681    hence pre-allocation is perfect.
3682 
3683    Level: intermediate
3684 
3685 .keywords: matrix, aij, compressed row, sparse, parallel
3686 
3687 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3688           MPIAIJ, MatGetInfo()
3689 @*/
3690 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3691 {
3692   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3693 
3694   PetscFunctionBegin;
3695   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3696   if (f) {
3697     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3698   }
3699   PetscFunctionReturn(0);
3700 }
3701 
3702 #undef __FUNCT__
3703 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3704 /*@
3705      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3706          CSR format the local rows.
3707 
3708    Collective on MPI_Comm
3709 
3710    Input Parameters:
3711 +  comm - MPI communicator
3712 .  m - number of local rows (Cannot be PETSC_DECIDE)
3713 .  n - This value should be the same as the local size used in creating the
3714        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3715        calculated if N is given) For square matrices n is almost always m.
3716 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3717 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3718 .   i - row indices
3719 .   j - column indices
3720 -   a - matrix values
3721 
3722    Output Parameter:
3723 .   mat - the matrix
3724 
3725    Level: intermediate
3726 
3727    Notes:
3728        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3729      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3730      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3731 
3732        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3733 
3734        The format which is used for the sparse matrix input, is equivalent to a
3735     row-major ordering.. i.e for the following matrix, the input data expected is
3736     as shown:
3737 
3738         1 0 0
3739         2 0 3     P0
3740        -------
3741         4 5 6     P1
3742 
3743      Process0 [P0]: rows_owned=[0,1]
3744         i =  {0,1,3}  [size = nrow+1  = 2+1]
3745         j =  {0,0,2}  [size = nz = 6]
3746         v =  {1,2,3}  [size = nz = 6]
3747 
3748      Process1 [P1]: rows_owned=[2]
3749         i =  {0,3}    [size = nrow+1  = 1+1]
3750         j =  {0,1,2}  [size = nz = 6]
3751         v =  {4,5,6}  [size = nz = 6]
3752 
3753 .keywords: matrix, aij, compressed row, sparse, parallel
3754 
3755 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3756           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3757 @*/
3758 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3759 {
3760   PetscErrorCode ierr;
3761 
3762  PetscFunctionBegin;
3763   if (i[0]) {
3764     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3765   }
3766   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3767   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3768   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3769   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3770   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3771   PetscFunctionReturn(0);
3772 }
3773 
3774 #undef __FUNCT__
3775 #define __FUNCT__ "MatCreateMPIAIJ"
3776 /*@C
3777    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3778    (the default parallel PETSc format).  For good matrix assembly performance
3779    the user should preallocate the matrix storage by setting the parameters
3780    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3781    performance can be increased by more than a factor of 50.
3782 
3783    Collective on MPI_Comm
3784 
3785    Input Parameters:
3786 +  comm - MPI communicator
3787 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3788            This value should be the same as the local size used in creating the
3789            y vector for the matrix-vector product y = Ax.
3790 .  n - This value should be the same as the local size used in creating the
3791        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3792        calculated if N is given) For square matrices n is almost always m.
3793 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3794 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3795 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3796            (same value is used for all local rows)
3797 .  d_nnz - array containing the number of nonzeros in the various rows of the
3798            DIAGONAL portion of the local submatrix (possibly different for each row)
3799            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3800            The size of this array is equal to the number of local rows, i.e 'm'.
3801            You must leave room for the diagonal entry even if it is zero.
3802 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3803            submatrix (same value is used for all local rows).
3804 -  o_nnz - array containing the number of nonzeros in the various rows of the
3805            OFF-DIAGONAL portion of the local submatrix (possibly different for
3806            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3807            structure. The size of this array is equal to the number
3808            of local rows, i.e 'm'.
3809 
3810    Output Parameter:
3811 .  A - the matrix
3812 
3813    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3814    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3815    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3816 
3817    Notes:
3818    If the *_nnz parameter is given then the *_nz parameter is ignored
3819 
3820    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3821    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3822    storage requirements for this matrix.
3823 
3824    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3825    processor than it must be used on all processors that share the object for
3826    that argument.
3827 
3828    The user MUST specify either the local or global matrix dimensions
3829    (possibly both).
3830 
3831    The parallel matrix is partitioned across processors such that the
3832    first m0 rows belong to process 0, the next m1 rows belong to
3833    process 1, the next m2 rows belong to process 2 etc.. where
3834    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3835    values corresponding to [m x N] submatrix.
3836 
3837    The columns are logically partitioned with the n0 columns belonging
3838    to 0th partition, the next n1 columns belonging to the next
3839    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3840 
3841    The DIAGONAL portion of the local submatrix on any given processor
3842    is the submatrix corresponding to the rows and columns m,n
3843    corresponding to the given processor. i.e diagonal matrix on
3844    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3845    etc. The remaining portion of the local submatrix [m x (N-n)]
3846    constitute the OFF-DIAGONAL portion. The example below better
3847    illustrates this concept.
3848 
3849    For a square global matrix we define each processor's diagonal portion
3850    to be its local rows and the corresponding columns (a square submatrix);
3851    each processor's off-diagonal portion encompasses the remainder of the
3852    local matrix (a rectangular submatrix).
3853 
3854    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3855 
3856    When calling this routine with a single process communicator, a matrix of
3857    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3858    type of communicator, use the construction mechanism:
3859      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3860 
3861    By default, this format uses inodes (identical nodes) when possible.
3862    We search for consecutive rows with the same nonzero structure, thereby
3863    reusing matrix information to achieve increased efficiency.
3864 
3865    Options Database Keys:
3866 +  -mat_no_inode  - Do not use inodes
3867 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3868 -  -mat_aij_oneindex - Internally use indexing starting at 1
3869         rather than 0.  Note that when calling MatSetValues(),
3870         the user still MUST index entries starting at 0!
3871 
3872 
3873    Example usage:
3874 
3875    Consider the following 8x8 matrix with 34 non-zero values, that is
3876    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3877    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3878    as follows:
3879 
3880 .vb
3881             1  2  0  |  0  3  0  |  0  4
3882     Proc0   0  5  6  |  7  0  0  |  8  0
3883             9  0 10  | 11  0  0  | 12  0
3884     -------------------------------------
3885            13  0 14  | 15 16 17  |  0  0
3886     Proc1   0 18  0  | 19 20 21  |  0  0
3887             0  0  0  | 22 23  0  | 24  0
3888     -------------------------------------
3889     Proc2  25 26 27  |  0  0 28  | 29  0
3890            30  0  0  | 31 32 33  |  0 34
3891 .ve
3892 
3893    This can be represented as a collection of submatrices as:
3894 
3895 .vb
3896       A B C
3897       D E F
3898       G H I
3899 .ve
3900 
3901    Where the submatrices A,B,C are owned by proc0, D,E,F are
3902    owned by proc1, G,H,I are owned by proc2.
3903 
3904    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3905    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3906    The 'M','N' parameters are 8,8, and have the same values on all procs.
3907 
3908    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3909    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3910    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3911    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3912    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3913    matrix, ans [DF] as another SeqAIJ matrix.
3914 
3915    When d_nz, o_nz parameters are specified, d_nz storage elements are
3916    allocated for every row of the local diagonal submatrix, and o_nz
3917    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3918    One way to choose d_nz and o_nz is to use the max nonzerors per local
3919    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3920    In this case, the values of d_nz,o_nz are:
3921 .vb
3922      proc0 : dnz = 2, o_nz = 2
3923      proc1 : dnz = 3, o_nz = 2
3924      proc2 : dnz = 1, o_nz = 4
3925 .ve
3926    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3927    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3928    for proc3. i.e we are using 12+15+10=37 storage locations to store
3929    34 values.
3930 
3931    When d_nnz, o_nnz parameters are specified, the storage is specified
3932    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3933    In the above case the values for d_nnz,o_nnz are:
3934 .vb
3935      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3936      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3937      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3938 .ve
3939    Here the space allocated is sum of all the above values i.e 34, and
3940    hence pre-allocation is perfect.
3941 
3942    Level: intermediate
3943 
3944 .keywords: matrix, aij, compressed row, sparse, parallel
3945 
3946 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3947           MPIAIJ, MatCreateMPIAIJWithArrays()
3948 @*/
3949 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3950 {
3951   PetscErrorCode ierr;
3952   PetscMPIInt    size;
3953 
3954   PetscFunctionBegin;
3955   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3956   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3957   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3958   if (size > 1) {
3959     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3960     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3961   } else {
3962     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3963     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3964   }
3965   PetscFunctionReturn(0);
3966 }
3967 
3968 #undef __FUNCT__
3969 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3970 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3971 {
3972   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3973 
3974   PetscFunctionBegin;
3975   *Ad     = a->A;
3976   *Ao     = a->B;
3977   *colmap = a->garray;
3978   PetscFunctionReturn(0);
3979 }
3980 
3981 #undef __FUNCT__
3982 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3983 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3984 {
3985   PetscErrorCode ierr;
3986   PetscInt       i;
3987   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3988 
3989   PetscFunctionBegin;
3990   if (coloring->ctype == IS_COLORING_GLOBAL) {
3991     ISColoringValue *allcolors,*colors;
3992     ISColoring      ocoloring;
3993 
3994     /* set coloring for diagonal portion */
3995     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3996 
3997     /* set coloring for off-diagonal portion */
3998     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3999     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4000     for (i=0; i<a->B->cmap->n; i++) {
4001       colors[i] = allcolors[a->garray[i]];
4002     }
4003     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4004     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4005     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4006     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
4007   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4008     ISColoringValue *colors;
4009     PetscInt        *larray;
4010     ISColoring      ocoloring;
4011 
4012     /* set coloring for diagonal portion */
4013     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4014     for (i=0; i<a->A->cmap->n; i++) {
4015       larray[i] = i + A->cmap->rstart;
4016     }
4017     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4018     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4019     for (i=0; i<a->A->cmap->n; i++) {
4020       colors[i] = coloring->colors[larray[i]];
4021     }
4022     ierr = PetscFree(larray);CHKERRQ(ierr);
4023     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4024     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4025     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
4026 
4027     /* set coloring for off-diagonal portion */
4028     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4029     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4030     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4031     for (i=0; i<a->B->cmap->n; i++) {
4032       colors[i] = coloring->colors[larray[i]];
4033     }
4034     ierr = PetscFree(larray);CHKERRQ(ierr);
4035     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4036     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4037     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
4038   } else {
4039     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4040   }
4041 
4042   PetscFunctionReturn(0);
4043 }
4044 
4045 #if defined(PETSC_HAVE_ADIC)
4046 #undef __FUNCT__
4047 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4048 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4049 {
4050   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4051   PetscErrorCode ierr;
4052 
4053   PetscFunctionBegin;
4054   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4055   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4056   PetscFunctionReturn(0);
4057 }
4058 #endif
4059 
4060 #undef __FUNCT__
4061 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4062 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4063 {
4064   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4065   PetscErrorCode ierr;
4066 
4067   PetscFunctionBegin;
4068   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4069   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4070   PetscFunctionReturn(0);
4071 }
4072 
4073 #undef __FUNCT__
4074 #define __FUNCT__ "MatMerge"
4075 /*@
4076       MatMerge - Creates a single large PETSc matrix by concatinating sequential
4077                  matrices from each processor
4078 
4079     Collective on MPI_Comm
4080 
4081    Input Parameters:
4082 +    comm - the communicators the parallel matrix will live on
4083 .    inmat - the input sequential matrices
4084 .    n - number of local columns (or PETSC_DECIDE)
4085 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4086 
4087    Output Parameter:
4088 .    outmat - the parallel matrix generated
4089 
4090     Level: advanced
4091 
4092    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4093 
4094 @*/
4095 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4096 {
4097   PetscErrorCode ierr;
4098   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
4099   PetscInt       *indx;
4100   PetscScalar    *values;
4101 
4102   PetscFunctionBegin;
4103   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4104   if (scall == MAT_INITIAL_MATRIX){
4105     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
4106     if (n == PETSC_DECIDE){
4107       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4108     }
4109     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4110     rstart -= m;
4111 
4112     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4113     for (i=0;i<m;i++) {
4114       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4115       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4116       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4117     }
4118     /* This routine will ONLY return MPIAIJ type matrix */
4119     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4120     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4121     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4122     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4123     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4124 
4125   } else if (scall == MAT_REUSE_MATRIX){
4126     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4127   } else {
4128     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4129   }
4130 
4131   for (i=0;i<m;i++) {
4132     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4133     Ii    = i + rstart;
4134     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4135     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4136   }
4137   ierr = MatDestroy(inmat);CHKERRQ(ierr);
4138   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4139   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4140 
4141   PetscFunctionReturn(0);
4142 }
4143 
4144 #undef __FUNCT__
4145 #define __FUNCT__ "MatFileSplit"
4146 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4147 {
4148   PetscErrorCode    ierr;
4149   PetscMPIInt       rank;
4150   PetscInt          m,N,i,rstart,nnz;
4151   size_t            len;
4152   const PetscInt    *indx;
4153   PetscViewer       out;
4154   char              *name;
4155   Mat               B;
4156   const PetscScalar *values;
4157 
4158   PetscFunctionBegin;
4159   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4160   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4161   /* Should this be the type of the diagonal block of A? */
4162   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4163   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4164   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4165   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4166   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4167   for (i=0;i<m;i++) {
4168     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4169     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4170     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4171   }
4172   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4173   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4174 
4175   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4176   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4177   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4178   sprintf(name,"%s.%d",outfile,rank);
4179   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4180   ierr = PetscFree(name);
4181   ierr = MatView(B,out);CHKERRQ(ierr);
4182   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
4183   ierr = MatDestroy(B);CHKERRQ(ierr);
4184   PetscFunctionReturn(0);
4185 }
4186 
4187 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
4188 #undef __FUNCT__
4189 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4190 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4191 {
4192   PetscErrorCode       ierr;
4193   Mat_Merge_SeqsToMPI  *merge;
4194   PetscContainer       container;
4195 
4196   PetscFunctionBegin;
4197   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4198   if (container) {
4199     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4200     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4201     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4202     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4203     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4204     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4205     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4206     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4207     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4208     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4209     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4210     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4211     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4212     ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr);
4213 
4214     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4215     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4216   }
4217   ierr = PetscFree(merge);CHKERRQ(ierr);
4218 
4219   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4220   PetscFunctionReturn(0);
4221 }
4222 
4223 #include "../src/mat/utils/freespace.h"
4224 #include "petscbt.h"
4225 
4226 #undef __FUNCT__
4227 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4228 /*@C
4229       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4230                  matrices from each processor
4231 
4232     Collective on MPI_Comm
4233 
4234    Input Parameters:
4235 +    comm - the communicators the parallel matrix will live on
4236 .    seqmat - the input sequential matrices
4237 .    m - number of local rows (or PETSC_DECIDE)
4238 .    n - number of local columns (or PETSC_DECIDE)
4239 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4240 
4241    Output Parameter:
4242 .    mpimat - the parallel matrix generated
4243 
4244     Level: advanced
4245 
4246    Notes:
4247      The dimensions of the sequential matrix in each processor MUST be the same.
4248      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4249      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4250 @*/
4251 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4252 {
4253   PetscErrorCode       ierr;
4254   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4255   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4256   PetscMPIInt          size,rank,taga,*len_s;
4257   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4258   PetscInt             proc,m;
4259   PetscInt             **buf_ri,**buf_rj;
4260   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4261   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4262   MPI_Request          *s_waits,*r_waits;
4263   MPI_Status           *status;
4264   MatScalar            *aa=a->a;
4265   MatScalar            **abuf_r,*ba_i;
4266   Mat_Merge_SeqsToMPI  *merge;
4267   PetscContainer       container;
4268 
4269   PetscFunctionBegin;
4270   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4271 
4272   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4273   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4274 
4275   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4276   if (container) {
4277     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4278   }
4279   bi     = merge->bi;
4280   bj     = merge->bj;
4281   buf_ri = merge->buf_ri;
4282   buf_rj = merge->buf_rj;
4283 
4284   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4285   owners = merge->rowmap->range;
4286   len_s  = merge->len_s;
4287 
4288   /* send and recv matrix values */
4289   /*-----------------------------*/
4290   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4291   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4292 
4293   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4294   for (proc=0,k=0; proc<size; proc++){
4295     if (!len_s[proc]) continue;
4296     i = owners[proc];
4297     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4298     k++;
4299   }
4300 
4301   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4302   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4303   ierr = PetscFree(status);CHKERRQ(ierr);
4304 
4305   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4306   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4307 
4308   /* insert mat values of mpimat */
4309   /*----------------------------*/
4310   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4311   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4312 
4313   for (k=0; k<merge->nrecv; k++){
4314     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4315     nrows = *(buf_ri_k[k]);
4316     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4317     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4318   }
4319 
4320   /* set values of ba */
4321   m = merge->rowmap->n;
4322   for (i=0; i<m; i++) {
4323     arow = owners[rank] + i;
4324     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4325     bnzi = bi[i+1] - bi[i];
4326     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4327 
4328     /* add local non-zero vals of this proc's seqmat into ba */
4329     anzi = ai[arow+1] - ai[arow];
4330     aj   = a->j + ai[arow];
4331     aa   = a->a + ai[arow];
4332     nextaj = 0;
4333     for (j=0; nextaj<anzi; j++){
4334       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4335         ba_i[j] += aa[nextaj++];
4336       }
4337     }
4338 
4339     /* add received vals into ba */
4340     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4341       /* i-th row */
4342       if (i == *nextrow[k]) {
4343         anzi = *(nextai[k]+1) - *nextai[k];
4344         aj   = buf_rj[k] + *(nextai[k]);
4345         aa   = abuf_r[k] + *(nextai[k]);
4346         nextaj = 0;
4347         for (j=0; nextaj<anzi; j++){
4348           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4349             ba_i[j] += aa[nextaj++];
4350           }
4351         }
4352         nextrow[k]++; nextai[k]++;
4353       }
4354     }
4355     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4356   }
4357   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4358   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4359 
4360   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4361   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4362   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4363   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4364   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4365   PetscFunctionReturn(0);
4366 }
4367 
4368 #undef __FUNCT__
4369 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4370 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4371 {
4372   PetscErrorCode       ierr;
4373   Mat                  B_mpi;
4374   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4375   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4376   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4377   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4378   PetscInt             len,proc,*dnz,*onz;
4379   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4380   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4381   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4382   MPI_Status           *status;
4383   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4384   PetscBT              lnkbt;
4385   Mat_Merge_SeqsToMPI  *merge;
4386   PetscContainer       container;
4387 
4388   PetscFunctionBegin;
4389   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4390 
4391   /* make sure it is a PETSc comm */
4392   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4393   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4394   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4395 
4396   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4397   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4398 
4399   /* determine row ownership */
4400   /*---------------------------------------------------------*/
4401   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4402   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4403   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4404   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4405   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4406   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4407   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4408 
4409   m      = merge->rowmap->n;
4410   M      = merge->rowmap->N;
4411   owners = merge->rowmap->range;
4412 
4413   /* determine the number of messages to send, their lengths */
4414   /*---------------------------------------------------------*/
4415   len_s  = merge->len_s;
4416 
4417   len = 0;  /* length of buf_si[] */
4418   merge->nsend = 0;
4419   for (proc=0; proc<size; proc++){
4420     len_si[proc] = 0;
4421     if (proc == rank){
4422       len_s[proc] = 0;
4423     } else {
4424       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4425       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4426     }
4427     if (len_s[proc]) {
4428       merge->nsend++;
4429       nrows = 0;
4430       for (i=owners[proc]; i<owners[proc+1]; i++){
4431         if (ai[i+1] > ai[i]) nrows++;
4432       }
4433       len_si[proc] = 2*(nrows+1);
4434       len += len_si[proc];
4435     }
4436   }
4437 
4438   /* determine the number and length of messages to receive for ij-structure */
4439   /*-------------------------------------------------------------------------*/
4440   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4441   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4442 
4443   /* post the Irecv of j-structure */
4444   /*-------------------------------*/
4445   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4446   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4447 
4448   /* post the Isend of j-structure */
4449   /*--------------------------------*/
4450   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4451 
4452   for (proc=0, k=0; proc<size; proc++){
4453     if (!len_s[proc]) continue;
4454     i = owners[proc];
4455     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4456     k++;
4457   }
4458 
4459   /* receives and sends of j-structure are complete */
4460   /*------------------------------------------------*/
4461   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4462   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4463 
4464   /* send and recv i-structure */
4465   /*---------------------------*/
4466   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4467   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4468 
4469   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4470   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4471   for (proc=0,k=0; proc<size; proc++){
4472     if (!len_s[proc]) continue;
4473     /* form outgoing message for i-structure:
4474          buf_si[0]:                 nrows to be sent
4475                [1:nrows]:           row index (global)
4476                [nrows+1:2*nrows+1]: i-structure index
4477     */
4478     /*-------------------------------------------*/
4479     nrows = len_si[proc]/2 - 1;
4480     buf_si_i    = buf_si + nrows+1;
4481     buf_si[0]   = nrows;
4482     buf_si_i[0] = 0;
4483     nrows = 0;
4484     for (i=owners[proc]; i<owners[proc+1]; i++){
4485       anzi = ai[i+1] - ai[i];
4486       if (anzi) {
4487         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4488         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4489         nrows++;
4490       }
4491     }
4492     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4493     k++;
4494     buf_si += len_si[proc];
4495   }
4496 
4497   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4498   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4499 
4500   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4501   for (i=0; i<merge->nrecv; i++){
4502     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4503   }
4504 
4505   ierr = PetscFree(len_si);CHKERRQ(ierr);
4506   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4507   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4508   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4509   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4510   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4511   ierr = PetscFree(status);CHKERRQ(ierr);
4512 
4513   /* compute a local seq matrix in each processor */
4514   /*----------------------------------------------*/
4515   /* allocate bi array and free space for accumulating nonzero column info */
4516   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4517   bi[0] = 0;
4518 
4519   /* create and initialize a linked list */
4520   nlnk = N+1;
4521   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4522 
4523   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4524   len = 0;
4525   len  = ai[owners[rank+1]] - ai[owners[rank]];
4526   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4527   current_space = free_space;
4528 
4529   /* determine symbolic info for each local row */
4530   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4531 
4532   for (k=0; k<merge->nrecv; k++){
4533     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4534     nrows = *buf_ri_k[k];
4535     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4536     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4537   }
4538 
4539   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4540   len = 0;
4541   for (i=0;i<m;i++) {
4542     bnzi   = 0;
4543     /* add local non-zero cols of this proc's seqmat into lnk */
4544     arow   = owners[rank] + i;
4545     anzi   = ai[arow+1] - ai[arow];
4546     aj     = a->j + ai[arow];
4547     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4548     bnzi += nlnk;
4549     /* add received col data into lnk */
4550     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4551       if (i == *nextrow[k]) { /* i-th row */
4552         anzi = *(nextai[k]+1) - *nextai[k];
4553         aj   = buf_rj[k] + *nextai[k];
4554         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4555         bnzi += nlnk;
4556         nextrow[k]++; nextai[k]++;
4557       }
4558     }
4559     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4560 
4561     /* if free space is not available, make more free space */
4562     if (current_space->local_remaining<bnzi) {
4563       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4564       nspacedouble++;
4565     }
4566     /* copy data into free space, then initialize lnk */
4567     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4568     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4569 
4570     current_space->array           += bnzi;
4571     current_space->local_used      += bnzi;
4572     current_space->local_remaining -= bnzi;
4573 
4574     bi[i+1] = bi[i] + bnzi;
4575   }
4576 
4577   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4578 
4579   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4580   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4581   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4582 
4583   /* create symbolic parallel matrix B_mpi */
4584   /*---------------------------------------*/
4585   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4586   if (n==PETSC_DECIDE) {
4587     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4588   } else {
4589     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4590   }
4591   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4592   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4593   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4594 
4595   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4596   B_mpi->assembled     = PETSC_FALSE;
4597   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4598   merge->bi            = bi;
4599   merge->bj            = bj;
4600   merge->buf_ri        = buf_ri;
4601   merge->buf_rj        = buf_rj;
4602   merge->coi           = PETSC_NULL;
4603   merge->coj           = PETSC_NULL;
4604   merge->owners_co     = PETSC_NULL;
4605 
4606   /* attach the supporting struct to B_mpi for reuse */
4607   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4608   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4609   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4610   *mpimat = B_mpi;
4611 
4612   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4613   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4614   PetscFunctionReturn(0);
4615 }
4616 
4617 #undef __FUNCT__
4618 #define __FUNCT__ "MatMerge_SeqsToMPI"
4619 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4620 {
4621   PetscErrorCode   ierr;
4622 
4623   PetscFunctionBegin;
4624   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4625   if (scall == MAT_INITIAL_MATRIX){
4626     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4627   }
4628   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4629   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4630   PetscFunctionReturn(0);
4631 }
4632 
4633 #undef __FUNCT__
4634 #define __FUNCT__ "MatGetLocalMat"
4635 /*@
4636      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4637 
4638     Not Collective
4639 
4640    Input Parameters:
4641 +    A - the matrix
4642 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4643 
4644    Output Parameter:
4645 .    A_loc - the local sequential matrix generated
4646 
4647     Level: developer
4648 
4649 @*/
4650 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4651 {
4652   PetscErrorCode  ierr;
4653   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4654   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4655   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4656   MatScalar       *aa=a->a,*ba=b->a,*cam;
4657   PetscScalar     *ca;
4658   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4659   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4660 
4661   PetscFunctionBegin;
4662   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4663   if (scall == MAT_INITIAL_MATRIX){
4664     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4665     ci[0] = 0;
4666     for (i=0; i<am; i++){
4667       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4668     }
4669     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4670     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4671     k = 0;
4672     for (i=0; i<am; i++) {
4673       ncols_o = bi[i+1] - bi[i];
4674       ncols_d = ai[i+1] - ai[i];
4675       /* off-diagonal portion of A */
4676       for (jo=0; jo<ncols_o; jo++) {
4677         col = cmap[*bj];
4678         if (col >= cstart) break;
4679         cj[k]   = col; bj++;
4680         ca[k++] = *ba++;
4681       }
4682       /* diagonal portion of A */
4683       for (j=0; j<ncols_d; j++) {
4684         cj[k]   = cstart + *aj++;
4685         ca[k++] = *aa++;
4686       }
4687       /* off-diagonal portion of A */
4688       for (j=jo; j<ncols_o; j++) {
4689         cj[k]   = cmap[*bj++];
4690         ca[k++] = *ba++;
4691       }
4692     }
4693     /* put together the new matrix */
4694     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4695     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4696     /* Since these are PETSc arrays, change flags to free them as necessary. */
4697     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4698     mat->free_a  = PETSC_TRUE;
4699     mat->free_ij = PETSC_TRUE;
4700     mat->nonew   = 0;
4701   } else if (scall == MAT_REUSE_MATRIX){
4702     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4703     ci = mat->i; cj = mat->j; cam = mat->a;
4704     for (i=0; i<am; i++) {
4705       /* off-diagonal portion of A */
4706       ncols_o = bi[i+1] - bi[i];
4707       for (jo=0; jo<ncols_o; jo++) {
4708         col = cmap[*bj];
4709         if (col >= cstart) break;
4710         *cam++ = *ba++; bj++;
4711       }
4712       /* diagonal portion of A */
4713       ncols_d = ai[i+1] - ai[i];
4714       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4715       /* off-diagonal portion of A */
4716       for (j=jo; j<ncols_o; j++) {
4717         *cam++ = *ba++; bj++;
4718       }
4719     }
4720   } else {
4721     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4722   }
4723 
4724   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4725   PetscFunctionReturn(0);
4726 }
4727 
4728 #undef __FUNCT__
4729 #define __FUNCT__ "MatGetLocalMatCondensed"
4730 /*@C
4731      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4732 
4733     Not Collective
4734 
4735    Input Parameters:
4736 +    A - the matrix
4737 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4738 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4739 
4740    Output Parameter:
4741 .    A_loc - the local sequential matrix generated
4742 
4743     Level: developer
4744 
4745 @*/
4746 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4747 {
4748   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4749   PetscErrorCode    ierr;
4750   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4751   IS                isrowa,iscola;
4752   Mat               *aloc;
4753 
4754   PetscFunctionBegin;
4755   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4756   if (!row){
4757     start = A->rmap->rstart; end = A->rmap->rend;
4758     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4759   } else {
4760     isrowa = *row;
4761   }
4762   if (!col){
4763     start = A->cmap->rstart;
4764     cmap  = a->garray;
4765     nzA   = a->A->cmap->n;
4766     nzB   = a->B->cmap->n;
4767     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4768     ncols = 0;
4769     for (i=0; i<nzB; i++) {
4770       if (cmap[i] < start) idx[ncols++] = cmap[i];
4771       else break;
4772     }
4773     imark = i;
4774     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4775     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4776     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4777     ierr = PetscFree(idx);CHKERRQ(ierr);
4778   } else {
4779     iscola = *col;
4780   }
4781   if (scall != MAT_INITIAL_MATRIX){
4782     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4783     aloc[0] = *A_loc;
4784   }
4785   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4786   *A_loc = aloc[0];
4787   ierr = PetscFree(aloc);CHKERRQ(ierr);
4788   if (!row){
4789     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4790   }
4791   if (!col){
4792     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4793   }
4794   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4795   PetscFunctionReturn(0);
4796 }
4797 
4798 #undef __FUNCT__
4799 #define __FUNCT__ "MatGetBrowsOfAcols"
4800 /*@C
4801     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4802 
4803     Collective on Mat
4804 
4805    Input Parameters:
4806 +    A,B - the matrices in mpiaij format
4807 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4808 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4809 
4810    Output Parameter:
4811 +    rowb, colb - index sets of rows and columns of B to extract
4812 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4813 -    B_seq - the sequential matrix generated
4814 
4815     Level: developer
4816 
4817 @*/
4818 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4819 {
4820   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4821   PetscErrorCode    ierr;
4822   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4823   IS                isrowb,iscolb;
4824   Mat               *bseq;
4825 
4826   PetscFunctionBegin;
4827   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4828     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4829   }
4830   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4831 
4832   if (scall == MAT_INITIAL_MATRIX){
4833     start = A->cmap->rstart;
4834     cmap  = a->garray;
4835     nzA   = a->A->cmap->n;
4836     nzB   = a->B->cmap->n;
4837     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4838     ncols = 0;
4839     for (i=0; i<nzB; i++) {  /* row < local row index */
4840       if (cmap[i] < start) idx[ncols++] = cmap[i];
4841       else break;
4842     }
4843     imark = i;
4844     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4845     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4846     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4847     ierr = PetscFree(idx);CHKERRQ(ierr);
4848     *brstart = imark;
4849     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4850   } else {
4851     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4852     isrowb = *rowb; iscolb = *colb;
4853     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4854     bseq[0] = *B_seq;
4855   }
4856   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4857   *B_seq = bseq[0];
4858   ierr = PetscFree(bseq);CHKERRQ(ierr);
4859   if (!rowb){
4860     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4861   } else {
4862     *rowb = isrowb;
4863   }
4864   if (!colb){
4865     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4866   } else {
4867     *colb = iscolb;
4868   }
4869   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4870   PetscFunctionReturn(0);
4871 }
4872 
4873 #undef __FUNCT__
4874 #define __FUNCT__ "MatGetBrowsOfAoCols"
4875 /*@C
4876     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4877     of the OFF-DIAGONAL portion of local A
4878 
4879     Collective on Mat
4880 
4881    Input Parameters:
4882 +    A,B - the matrices in mpiaij format
4883 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4884 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4885 .    startsj_r - similar to startsj for receives
4886 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4887 
4888    Output Parameter:
4889 +    B_oth - the sequential matrix generated
4890 
4891     Level: developer
4892 
4893 @*/
4894 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4895 {
4896   VecScatter_MPI_General *gen_to,*gen_from;
4897   PetscErrorCode         ierr;
4898   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4899   Mat_SeqAIJ             *b_oth;
4900   VecScatter             ctx=a->Mvctx;
4901   MPI_Comm               comm=((PetscObject)ctx)->comm;
4902   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4903   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4904   PetscScalar            *rvalues,*svalues;
4905   MatScalar              *b_otha,*bufa,*bufA;
4906   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4907   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4908   MPI_Status             *sstatus,rstatus;
4909   PetscMPIInt            jj;
4910   PetscInt               *cols,sbs,rbs;
4911   PetscScalar            *vals;
4912 
4913   PetscFunctionBegin;
4914   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4915     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4916   }
4917   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4918   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4919 
4920   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4921   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4922   rvalues  = gen_from->values; /* holds the length of receiving row */
4923   svalues  = gen_to->values;   /* holds the length of sending row */
4924   nrecvs   = gen_from->n;
4925   nsends   = gen_to->n;
4926 
4927   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4928   srow     = gen_to->indices;   /* local row index to be sent */
4929   sstarts  = gen_to->starts;
4930   sprocs   = gen_to->procs;
4931   sstatus  = gen_to->sstatus;
4932   sbs      = gen_to->bs;
4933   rstarts  = gen_from->starts;
4934   rprocs   = gen_from->procs;
4935   rbs      = gen_from->bs;
4936 
4937   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4938   if (scall == MAT_INITIAL_MATRIX){
4939     /* i-array */
4940     /*---------*/
4941     /*  post receives */
4942     for (i=0; i<nrecvs; i++){
4943       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4944       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4945       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4946     }
4947 
4948     /* pack the outgoing message */
4949     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
4950     sstartsj[0] = 0;  rstartsj[0] = 0;
4951     len = 0; /* total length of j or a array to be sent */
4952     k = 0;
4953     for (i=0; i<nsends; i++){
4954       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4955       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4956       for (j=0; j<nrows; j++) {
4957         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4958         for (l=0; l<sbs; l++){
4959           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4960           rowlen[j*sbs+l] = ncols;
4961           len += ncols;
4962           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4963         }
4964         k++;
4965       }
4966       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4967       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4968     }
4969     /* recvs and sends of i-array are completed */
4970     i = nrecvs;
4971     while (i--) {
4972       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4973     }
4974     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4975 
4976     /* allocate buffers for sending j and a arrays */
4977     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4978     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4979 
4980     /* create i-array of B_oth */
4981     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4982     b_othi[0] = 0;
4983     len = 0; /* total length of j or a array to be received */
4984     k = 0;
4985     for (i=0; i<nrecvs; i++){
4986       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4987       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4988       for (j=0; j<nrows; j++) {
4989         b_othi[k+1] = b_othi[k] + rowlen[j];
4990         len += rowlen[j]; k++;
4991       }
4992       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4993     }
4994 
4995     /* allocate space for j and a arrrays of B_oth */
4996     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4997     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4998 
4999     /* j-array */
5000     /*---------*/
5001     /*  post receives of j-array */
5002     for (i=0; i<nrecvs; i++){
5003       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5004       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5005     }
5006 
5007     /* pack the outgoing message j-array */
5008     k = 0;
5009     for (i=0; i<nsends; i++){
5010       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5011       bufJ = bufj+sstartsj[i];
5012       for (j=0; j<nrows; j++) {
5013         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5014         for (ll=0; ll<sbs; ll++){
5015           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5016           for (l=0; l<ncols; l++){
5017             *bufJ++ = cols[l];
5018           }
5019           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5020         }
5021       }
5022       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5023     }
5024 
5025     /* recvs and sends of j-array are completed */
5026     i = nrecvs;
5027     while (i--) {
5028       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5029     }
5030     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5031   } else if (scall == MAT_REUSE_MATRIX){
5032     sstartsj = *startsj;
5033     rstartsj = *startsj_r;
5034     bufa     = *bufa_ptr;
5035     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5036     b_otha   = b_oth->a;
5037   } else {
5038     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5039   }
5040 
5041   /* a-array */
5042   /*---------*/
5043   /*  post receives of a-array */
5044   for (i=0; i<nrecvs; i++){
5045     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5046     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5047   }
5048 
5049   /* pack the outgoing message a-array */
5050   k = 0;
5051   for (i=0; i<nsends; i++){
5052     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5053     bufA = bufa+sstartsj[i];
5054     for (j=0; j<nrows; j++) {
5055       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5056       for (ll=0; ll<sbs; ll++){
5057         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5058         for (l=0; l<ncols; l++){
5059           *bufA++ = vals[l];
5060         }
5061         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5062       }
5063     }
5064     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5065   }
5066   /* recvs and sends of a-array are completed */
5067   i = nrecvs;
5068   while (i--) {
5069     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5070   }
5071   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5072   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5073 
5074   if (scall == MAT_INITIAL_MATRIX){
5075     /* put together the new matrix */
5076     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5077 
5078     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5079     /* Since these are PETSc arrays, change flags to free them as necessary. */
5080     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5081     b_oth->free_a  = PETSC_TRUE;
5082     b_oth->free_ij = PETSC_TRUE;
5083     b_oth->nonew   = 0;
5084 
5085     ierr = PetscFree(bufj);CHKERRQ(ierr);
5086     if (!startsj || !bufa_ptr){
5087       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5088       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5089     } else {
5090       *startsj   = sstartsj;
5091       *startsj_r = rstartsj;
5092       *bufa_ptr  = bufa;
5093     }
5094   }
5095   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5096   PetscFunctionReturn(0);
5097 }
5098 
5099 #undef __FUNCT__
5100 #define __FUNCT__ "MatGetCommunicationStructs"
5101 /*@C
5102   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5103 
5104   Not Collective
5105 
5106   Input Parameters:
5107 . A - The matrix in mpiaij format
5108 
5109   Output Parameter:
5110 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5111 . colmap - A map from global column index to local index into lvec
5112 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5113 
5114   Level: developer
5115 
5116 @*/
5117 #if defined (PETSC_USE_CTABLE)
5118 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5119 #else
5120 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5121 #endif
5122 {
5123   Mat_MPIAIJ *a;
5124 
5125   PetscFunctionBegin;
5126   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5127   PetscValidPointer(lvec, 2);
5128   PetscValidPointer(colmap, 3);
5129   PetscValidPointer(multScatter, 4);
5130   a = (Mat_MPIAIJ *) A->data;
5131   if (lvec) *lvec = a->lvec;
5132   if (colmap) *colmap = a->colmap;
5133   if (multScatter) *multScatter = a->Mvctx;
5134   PetscFunctionReturn(0);
5135 }
5136 
5137 EXTERN_C_BEGIN
5138 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*);
5139 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*);
5140 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5141 EXTERN_C_END
5142 
5143 #undef __FUNCT__
5144 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5145 /*
5146     Computes (B'*A')' since computing B*A directly is untenable
5147 
5148                n                       p                          p
5149         (              )       (              )         (                  )
5150       m (      A       )  *  n (       B      )   =   m (         C        )
5151         (              )       (              )         (                  )
5152 
5153 */
5154 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5155 {
5156   PetscErrorCode     ierr;
5157   Mat                At,Bt,Ct;
5158 
5159   PetscFunctionBegin;
5160   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5161   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5162   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5163   ierr = MatDestroy(At);CHKERRQ(ierr);
5164   ierr = MatDestroy(Bt);CHKERRQ(ierr);
5165   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5166   ierr = MatDestroy(Ct);CHKERRQ(ierr);
5167   PetscFunctionReturn(0);
5168 }
5169 
5170 #undef __FUNCT__
5171 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5172 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5173 {
5174   PetscErrorCode ierr;
5175   PetscInt       m=A->rmap->n,n=B->cmap->n;
5176   Mat            Cmat;
5177 
5178   PetscFunctionBegin;
5179   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5180   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5181   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5182   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5183   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5184   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5185   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5186   *C   = Cmat;
5187   PetscFunctionReturn(0);
5188 }
5189 
5190 /* ----------------------------------------------------------------*/
5191 #undef __FUNCT__
5192 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5193 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5194 {
5195   PetscErrorCode ierr;
5196 
5197   PetscFunctionBegin;
5198   if (scall == MAT_INITIAL_MATRIX){
5199     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5200   }
5201   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5202   PetscFunctionReturn(0);
5203 }
5204 
5205 EXTERN_C_BEGIN
5206 #if defined(PETSC_HAVE_MUMPS)
5207 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5208 #endif
5209 #if defined(PETSC_HAVE_PASTIX)
5210 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5211 #endif
5212 #if defined(PETSC_HAVE_SUPERLU_DIST)
5213 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5214 #endif
5215 #if defined(PETSC_HAVE_SPOOLES)
5216 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5217 #endif
5218 EXTERN_C_END
5219 
5220 /*MC
5221    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5222 
5223    Options Database Keys:
5224 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5225 
5226   Level: beginner
5227 
5228 .seealso: MatCreateMPIAIJ()
5229 M*/
5230 
5231 EXTERN_C_BEGIN
5232 #undef __FUNCT__
5233 #define __FUNCT__ "MatCreate_MPIAIJ"
5234 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
5235 {
5236   Mat_MPIAIJ     *b;
5237   PetscErrorCode ierr;
5238   PetscMPIInt    size;
5239 
5240   PetscFunctionBegin;
5241   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5242 
5243   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5244   B->data         = (void*)b;
5245   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5246   B->rmap->bs     = 1;
5247   B->assembled    = PETSC_FALSE;
5248   B->mapping      = 0;
5249 
5250   B->insertmode   = NOT_SET_VALUES;
5251   b->size         = size;
5252   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5253 
5254   /* build cache for off array entries formed */
5255   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5256   b->donotstash  = PETSC_FALSE;
5257   b->colmap      = 0;
5258   b->garray      = 0;
5259   b->roworiented = PETSC_TRUE;
5260 
5261   /* stuff used for matrix vector multiply */
5262   b->lvec      = PETSC_NULL;
5263   b->Mvctx     = PETSC_NULL;
5264 
5265   /* stuff for MatGetRow() */
5266   b->rowindices   = 0;
5267   b->rowvalues    = 0;
5268   b->getrowactive = PETSC_FALSE;
5269 
5270 #if defined(PETSC_HAVE_SPOOLES)
5271   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5272                                      "MatGetFactor_mpiaij_spooles",
5273                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5274 #endif
5275 #if defined(PETSC_HAVE_MUMPS)
5276   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5277                                      "MatGetFactor_aij_mumps",
5278                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5279 #endif
5280 #if defined(PETSC_HAVE_PASTIX)
5281   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5282 					   "MatGetFactor_mpiaij_pastix",
5283 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5284 #endif
5285 #if defined(PETSC_HAVE_SUPERLU_DIST)
5286   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5287                                      "MatGetFactor_mpiaij_superlu_dist",
5288                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5289 #endif
5290   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5291                                      "MatStoreValues_MPIAIJ",
5292                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5293   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5294                                      "MatRetrieveValues_MPIAIJ",
5295                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5296   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5297 				     "MatGetDiagonalBlock_MPIAIJ",
5298                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5299   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5300 				     "MatIsTranspose_MPIAIJ",
5301 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5302   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5303 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5304 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5305   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5306 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5307 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5308   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5309 				     "MatDiagonalScaleLocal_MPIAIJ",
5310 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5311   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
5312                                      "MatConvert_MPIAIJ_MPICSRPERM",
5313                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
5314   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
5315                                      "MatConvert_MPIAIJ_MPICRL",
5316                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
5317   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5318                                      "MatConvert_MPIAIJ_MPISBAIJ",
5319                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5320   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5321                                      "MatMatMult_MPIDense_MPIAIJ",
5322                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5323   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5324                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5325                                       MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5326   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5327                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5328                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5329   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5330   PetscFunctionReturn(0);
5331 }
5332 EXTERN_C_END
5333 
5334 #undef __FUNCT__
5335 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5336 /*@
5337      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5338          and "off-diagonal" part of the matrix in CSR format.
5339 
5340    Collective on MPI_Comm
5341 
5342    Input Parameters:
5343 +  comm - MPI communicator
5344 .  m - number of local rows (Cannot be PETSC_DECIDE)
5345 .  n - This value should be the same as the local size used in creating the
5346        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5347        calculated if N is given) For square matrices n is almost always m.
5348 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5349 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5350 .   i - row indices for "diagonal" portion of matrix
5351 .   j - column indices
5352 .   a - matrix values
5353 .   oi - row indices for "off-diagonal" portion of matrix
5354 .   oj - column indices
5355 -   oa - matrix values
5356 
5357    Output Parameter:
5358 .   mat - the matrix
5359 
5360    Level: advanced
5361 
5362    Notes:
5363        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5364 
5365        The i and j indices are 0 based
5366 
5367        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5368 
5369        This sets local rows and cannot be used to set off-processor values.
5370 
5371        You cannot later use MatSetValues() to change values in this matrix.
5372 
5373 .keywords: matrix, aij, compressed row, sparse, parallel
5374 
5375 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5376           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5377 @*/
5378 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5379 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5380 {
5381   PetscErrorCode ierr;
5382   Mat_MPIAIJ     *maij;
5383 
5384  PetscFunctionBegin;
5385   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5386   if (i[0]) {
5387     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5388   }
5389   if (oi[0]) {
5390     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5391   }
5392   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5393   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5394   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5395   maij = (Mat_MPIAIJ*) (*mat)->data;
5396   maij->donotstash     = PETSC_TRUE;
5397   (*mat)->preallocated = PETSC_TRUE;
5398 
5399   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5400   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5401   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5402   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5403 
5404   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5405   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5406 
5407   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5408   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5409   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5410   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5411 
5412   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5413   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5414   PetscFunctionReturn(0);
5415 }
5416 
5417 /*
5418     Special version for direct calls from Fortran
5419 */
5420 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5421 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5422 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5423 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5424 #endif
5425 
5426 /* Change these macros so can be used in void function */
5427 #undef CHKERRQ
5428 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5429 #undef SETERRQ2
5430 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5431 #undef SETERRQ
5432 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5433 
5434 EXTERN_C_BEGIN
5435 #undef __FUNCT__
5436 #define __FUNCT__ "matsetvaluesmpiaij_"
5437 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5438 {
5439   Mat             mat = *mmat;
5440   PetscInt        m = *mm, n = *mn;
5441   InsertMode      addv = *maddv;
5442   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5443   PetscScalar     value;
5444   PetscErrorCode  ierr;
5445 
5446   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5447   if (mat->insertmode == NOT_SET_VALUES) {
5448     mat->insertmode = addv;
5449   }
5450 #if defined(PETSC_USE_DEBUG)
5451   else if (mat->insertmode != addv) {
5452     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5453   }
5454 #endif
5455   {
5456   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5457   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5458   PetscTruth      roworiented = aij->roworiented;
5459 
5460   /* Some Variables required in the macro */
5461   Mat             A = aij->A;
5462   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5463   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5464   MatScalar       *aa = a->a;
5465   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5466   Mat             B = aij->B;
5467   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5468   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5469   MatScalar       *ba = b->a;
5470 
5471   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5472   PetscInt        nonew = a->nonew;
5473   MatScalar       *ap1,*ap2;
5474 
5475   PetscFunctionBegin;
5476   for (i=0; i<m; i++) {
5477     if (im[i] < 0) continue;
5478 #if defined(PETSC_USE_DEBUG)
5479     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5480 #endif
5481     if (im[i] >= rstart && im[i] < rend) {
5482       row      = im[i] - rstart;
5483       lastcol1 = -1;
5484       rp1      = aj + ai[row];
5485       ap1      = aa + ai[row];
5486       rmax1    = aimax[row];
5487       nrow1    = ailen[row];
5488       low1     = 0;
5489       high1    = nrow1;
5490       lastcol2 = -1;
5491       rp2      = bj + bi[row];
5492       ap2      = ba + bi[row];
5493       rmax2    = bimax[row];
5494       nrow2    = bilen[row];
5495       low2     = 0;
5496       high2    = nrow2;
5497 
5498       for (j=0; j<n; j++) {
5499         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5500         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5501         if (in[j] >= cstart && in[j] < cend){
5502           col = in[j] - cstart;
5503           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5504         } else if (in[j] < 0) continue;
5505 #if defined(PETSC_USE_DEBUG)
5506         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5507 #endif
5508         else {
5509           if (mat->was_assembled) {
5510             if (!aij->colmap) {
5511               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5512             }
5513 #if defined (PETSC_USE_CTABLE)
5514             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5515 	    col--;
5516 #else
5517             col = aij->colmap[in[j]] - 1;
5518 #endif
5519             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5520               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5521               col =  in[j];
5522               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5523               B = aij->B;
5524               b = (Mat_SeqAIJ*)B->data;
5525               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5526               rp2      = bj + bi[row];
5527               ap2      = ba + bi[row];
5528               rmax2    = bimax[row];
5529               nrow2    = bilen[row];
5530               low2     = 0;
5531               high2    = nrow2;
5532               bm       = aij->B->rmap->n;
5533               ba = b->a;
5534             }
5535           } else col = in[j];
5536           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5537         }
5538       }
5539     } else {
5540       if (!aij->donotstash) {
5541         if (roworiented) {
5542           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5543         } else {
5544           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5545         }
5546       }
5547     }
5548   }}
5549   PetscFunctionReturnVoid();
5550 }
5551 EXTERN_C_END
5552 
5553