xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 27bdab1e45eef8b231b07730c1b1dd8ccdf290d8)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "petscblaslapack.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscBool      aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap->n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap->n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap->n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap->n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
207 { \
208     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
209     lastcol1 = col;\
210     while (high1-low1 > 5) { \
211       t = (low1+high1)/2; \
212       if (rp1[t] > col) high1 = t; \
213       else             low1  = t; \
214     } \
215       for (_i=low1; _i<high1; _i++) { \
216         if (rp1[_i] > col) break; \
217         if (rp1[_i] == col) { \
218           if (addv == ADD_VALUES) ap1[_i] += value;   \
219           else                    ap1[_i] = value; \
220           goto a_noinsert; \
221         } \
222       }  \
223       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
224       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
225       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
226       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
227       N = nrow1++ - 1; a->nz++; high1++; \
228       /* shift up all the later entries in this row */ \
229       for (ii=N; ii>=_i; ii--) { \
230         rp1[ii+1] = rp1[ii]; \
231         ap1[ii+1] = ap1[ii]; \
232       } \
233       rp1[_i] = col;  \
234       ap1[_i] = value;  \
235       a_noinsert: ; \
236       ailen[row] = nrow1; \
237 }
238 
239 
240 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
241 { \
242     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
243     lastcol2 = col;\
244     while (high2-low2 > 5) { \
245       t = (low2+high2)/2; \
246       if (rp2[t] > col) high2 = t; \
247       else             low2  = t; \
248     } \
249     for (_i=low2; _i<high2; _i++) {		\
250       if (rp2[_i] > col) break;			\
251       if (rp2[_i] == col) {			      \
252 	if (addv == ADD_VALUES) ap2[_i] += value;     \
253 	else                    ap2[_i] = value;      \
254 	goto b_noinsert;			      \
255       }						      \
256     }							      \
257     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
258     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
259     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
260     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
261     N = nrow2++ - 1; b->nz++; high2++;					\
262     /* shift up all the later entries in this row */			\
263     for (ii=N; ii>=_i; ii--) {						\
264       rp2[ii+1] = rp2[ii];						\
265       ap2[ii+1] = ap2[ii];						\
266     }									\
267     rp2[_i] = col;							\
268     ap2[_i] = value;							\
269     b_noinsert: ;								\
270     bilen[row] = nrow2;							\
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
275 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
276 {
277   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
278   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
279   PetscErrorCode ierr;
280   PetscInt       l,*garray = mat->garray,diag;
281 
282   PetscFunctionBegin;
283   /* code only works for square matrices A */
284 
285   /* find size of row to the left of the diagonal part */
286   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
287   row  = row - diag;
288   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
289     if (garray[b->j[b->i[row]+l]] > diag) break;
290   }
291   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
292 
293   /* diagonal part */
294   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
295 
296   /* right of diagonal part */
297   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
298   PetscFunctionReturn(0);
299 }
300 
301 #undef __FUNCT__
302 #define __FUNCT__ "MatSetValues_MPIAIJ"
303 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
304 {
305   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
306   PetscScalar    value;
307   PetscErrorCode ierr;
308   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
309   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
310   PetscBool      roworiented = aij->roworiented;
311 
312   /* Some Variables required in the macro */
313   Mat            A = aij->A;
314   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
315   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
316   MatScalar      *aa = a->a;
317   PetscBool      ignorezeroentries = a->ignorezeroentries;
318   Mat            B = aij->B;
319   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
320   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
321   MatScalar      *ba = b->a;
322 
323   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
324   PetscInt       nonew = a->nonew;
325   MatScalar      *ap1,*ap2;
326 
327   PetscFunctionBegin;
328   if (v) PetscValidScalarPointer(v,6);
329   for (i=0; i<m; i++) {
330     if (im[i] < 0) continue;
331 #if defined(PETSC_USE_DEBUG)
332     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
333 #endif
334     if (im[i] >= rstart && im[i] < rend) {
335       row      = im[i] - rstart;
336       lastcol1 = -1;
337       rp1      = aj + ai[row];
338       ap1      = aa + ai[row];
339       rmax1    = aimax[row];
340       nrow1    = ailen[row];
341       low1     = 0;
342       high1    = nrow1;
343       lastcol2 = -1;
344       rp2      = bj + bi[row];
345       ap2      = ba + bi[row];
346       rmax2    = bimax[row];
347       nrow2    = bilen[row];
348       low2     = 0;
349       high2    = nrow2;
350 
351       for (j=0; j<n; j++) {
352         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
353         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
354         if (in[j] >= cstart && in[j] < cend){
355           col = in[j] - cstart;
356           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
357         } else if (in[j] < 0) continue;
358 #if defined(PETSC_USE_DEBUG)
359         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
360 #endif
361         else {
362           if (mat->was_assembled) {
363             if (!aij->colmap) {
364               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
365             }
366 #if defined (PETSC_USE_CTABLE)
367             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
368 	    col--;
369 #else
370             col = aij->colmap[in[j]] - 1;
371 #endif
372             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
373               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
374               col =  in[j];
375               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
376               B = aij->B;
377               b = (Mat_SeqAIJ*)B->data;
378               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
379               rp2      = bj + bi[row];
380               ap2      = ba + bi[row];
381               rmax2    = bimax[row];
382               nrow2    = bilen[row];
383               low2     = 0;
384               high2    = nrow2;
385               bm       = aij->B->rmap->n;
386               ba = b->a;
387             }
388           } else col = in[j];
389           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
390         }
391       }
392     } else {
393       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
394       if (!aij->donotstash) {
395         if (roworiented) {
396           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool )(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
397         } else {
398           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool )(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
399         }
400       }
401     }
402   }
403   PetscFunctionReturn(0);
404 }
405 
406 #undef __FUNCT__
407 #define __FUNCT__ "MatGetValues_MPIAIJ"
408 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
409 {
410   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
413   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
414 
415   PetscFunctionBegin;
416   for (i=0; i<m; i++) {
417     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
418     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
419     if (idxm[i] >= rstart && idxm[i] < rend) {
420       row = idxm[i] - rstart;
421       for (j=0; j<n; j++) {
422         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
423         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
424         if (idxn[j] >= cstart && idxn[j] < cend){
425           col = idxn[j] - cstart;
426           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
427         } else {
428           if (!aij->colmap) {
429             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
430           }
431 #if defined (PETSC_USE_CTABLE)
432           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
433           col --;
434 #else
435           col = aij->colmap[idxn[j]] - 1;
436 #endif
437           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
438           else {
439             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
440           }
441         }
442       }
443     } else {
444       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
445     }
446   }
447   PetscFunctionReturn(0);
448 }
449 
450 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscErrorCode ierr;
458   PetscInt       nstash,reallocs;
459   InsertMode     addv;
460 
461   PetscFunctionBegin;
462   if (aij->donotstash || mat->nooffprocentries) {
463     PetscFunctionReturn(0);
464   }
465 
466   /* make sure all processors are either in INSERTMODE or ADDMODE */
467   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
468   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
469   mat->insertmode = addv; /* in case this processor had no cache */
470 
471   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
472   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
473   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
474   PetscFunctionReturn(0);
475 }
476 
477 #undef __FUNCT__
478 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
479 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
480 {
481   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
482   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
483   PetscErrorCode ierr;
484   PetscMPIInt    n;
485   PetscInt       i,j,rstart,ncols,flg;
486   PetscInt       *row,*col;
487   PetscBool      other_disassembled;
488   PetscScalar    *val;
489   InsertMode     addv = mat->insertmode;
490 
491   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
492   PetscFunctionBegin;
493   if (!aij->donotstash && !mat->nooffprocentries) {
494     while (1) {
495       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
496       if (!flg) break;
497 
498       for (i=0; i<n;) {
499         /* Now identify the consecutive vals belonging to the same row */
500         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
501         if (j < n) ncols = j-i;
502         else       ncols = n-i;
503         /* Now assemble all these values with a single function call */
504         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
505         i = j;
506       }
507     }
508     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
509   }
510   a->compressedrow.use     = PETSC_FALSE;
511   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
512   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
513 
514   /* determine if any processor has disassembled, if so we must
515      also disassemble ourselfs, in order that we may reassemble. */
516   /*
517      if nonzero structure of submatrix B cannot change then we know that
518      no processor disassembled thus we can skip this stuff
519   */
520   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
521     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
522     if (mat->was_assembled && !other_disassembled) {
523       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
524     }
525   }
526   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
527     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
528   }
529   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
530   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
531   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
532   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
533 
534   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
535   aij->rowvalues = 0;
536 
537   /* used by MatAXPY() */
538   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
539   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
540 
541   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
542   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
549 {
550   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
551   PetscErrorCode ierr;
552 
553   PetscFunctionBegin;
554   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
555   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNCT__
560 #define __FUNCT__ "MatZeroRows_MPIAIJ"
561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
562 {
563   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
564   PetscErrorCode ierr;
565   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
566   PetscInt       i,*owners = A->rmap->range;
567   PetscInt       *nprocs,j,idx,nsends,row;
568   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
569   PetscInt       *rvalues,count,base,slen,*source;
570   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
571   MPI_Comm       comm = ((PetscObject)A)->comm;
572   MPI_Request    *send_waits,*recv_waits;
573   MPI_Status     recv_status,*send_status;
574 #if defined(PETSC_DEBUG)
575   PetscBool      found = PETSC_FALSE;
576 #endif
577 
578   PetscFunctionBegin;
579   /*  first count number of contributors to each processor */
580   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
581   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
582   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
583   j = 0;
584   for (i=0; i<N; i++) {
585     if (lastidx > (idx = rows[i])) j = 0;
586     lastidx = idx;
587     for (; j<size; j++) {
588       if (idx >= owners[j] && idx < owners[j+1]) {
589         nprocs[2*j]++;
590         nprocs[2*j+1] = 1;
591         owner[i] = j;
592 #if defined(PETSC_DEBUG)
593         found = PETSC_TRUE;
594 #endif
595         break;
596       }
597     }
598 #if defined(PETSC_DEBUG)
599     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
600     found = PETSC_FALSE;
601 #endif
602   }
603   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
604 
605   if (A->nooffproczerorows) {
606     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
607     nrecvs = nsends;
608     nmax   = N;
609   } else {
610     /* inform other processors of number of messages and max length*/
611     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
612   }
613 
614   /* post receives:   */
615   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
616   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
617   for (i=0; i<nrecvs; i++) {
618     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
619   }
620 
621   /* do sends:
622       1) starts[i] gives the starting index in svalues for stuff going to
623          the ith processor
624   */
625   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
626   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
627   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
628   starts[0] = 0;
629   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
630   for (i=0; i<N; i++) {
631     svalues[starts[owner[i]]++] = rows[i];
632   }
633 
634   starts[0] = 0;
635   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
636   count = 0;
637   for (i=0; i<size; i++) {
638     if (nprocs[2*i+1]) {
639       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
640     }
641   }
642   ierr = PetscFree(starts);CHKERRQ(ierr);
643 
644   base = owners[rank];
645 
646   /*  wait on receives */
647   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
648   count  = nrecvs; slen = 0;
649   while (count) {
650     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
651     /* unpack receives into our local space */
652     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
653     source[imdex]  = recv_status.MPI_SOURCE;
654     lens[imdex]    = n;
655     slen          += n;
656     count--;
657   }
658   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
659 
660   /* move the data into the send scatter */
661   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
662   count = 0;
663   for (i=0; i<nrecvs; i++) {
664     values = rvalues + i*nmax;
665     for (j=0; j<lens[i]; j++) {
666       lrows[count++] = values[j] - base;
667     }
668   }
669   ierr = PetscFree(rvalues);CHKERRQ(ierr);
670   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
671   ierr = PetscFree(owner);CHKERRQ(ierr);
672   ierr = PetscFree(nprocs);CHKERRQ(ierr);
673 
674   /* actually zap the local rows */
675   /*
676         Zero the required rows. If the "diagonal block" of the matrix
677      is square and the user wishes to set the diagonal we use separate
678      code so that MatSetValues() is not called for each diagonal allocating
679      new memory, thus calling lots of mallocs and slowing things down.
680 
681   */
682   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
683   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
684   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
685     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
686   } else if (diag != 0.0) {
687     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
688     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
689       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
690 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
691     }
692     for (i = 0; i < slen; i++) {
693       row  = lrows[i] + rstart;
694       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
695     }
696     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
697     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
698   } else {
699     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
700   }
701   ierr = PetscFree(lrows);CHKERRQ(ierr);
702 
703   /* wait on sends */
704   if (nsends) {
705     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
706     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
707     ierr = PetscFree(send_status);CHKERRQ(ierr);
708   }
709   ierr = PetscFree(send_waits);CHKERRQ(ierr);
710   ierr = PetscFree(svalues);CHKERRQ(ierr);
711 
712   PetscFunctionReturn(0);
713 }
714 
715 #undef __FUNCT__
716 #define __FUNCT__ "MatMult_MPIAIJ"
717 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
718 {
719   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
720   PetscErrorCode ierr;
721   PetscInt       nt;
722 
723   PetscFunctionBegin;
724   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
725   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
726   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
727   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
728   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
729   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
730   PetscFunctionReturn(0);
731 }
732 
733 #undef __FUNCT__
734 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
735 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
736 {
737   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
738   PetscErrorCode ierr;
739 
740   PetscFunctionBegin;
741   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
742   PetscFunctionReturn(0);
743 }
744 
745 #undef __FUNCT__
746 #define __FUNCT__ "MatMultAdd_MPIAIJ"
747 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
748 {
749   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
750   PetscErrorCode ierr;
751 
752   PetscFunctionBegin;
753   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
754   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
755   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
756   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
757   PetscFunctionReturn(0);
758 }
759 
760 #undef __FUNCT__
761 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
762 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
763 {
764   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
765   PetscErrorCode ierr;
766   PetscBool      merged;
767 
768   PetscFunctionBegin;
769   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
770   /* do nondiagonal part */
771   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
772   if (!merged) {
773     /* send it on its way */
774     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
775     /* do local part */
776     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
777     /* receive remote parts: note this assumes the values are not actually */
778     /* added in yy until the next line, */
779     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
780   } else {
781     /* do local part */
782     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
783     /* send it on its way */
784     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
785     /* values actually were received in the Begin() but we need to call this nop */
786     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
787   }
788   PetscFunctionReturn(0);
789 }
790 
791 EXTERN_C_BEGIN
792 #undef __FUNCT__
793 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
794 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
795 {
796   MPI_Comm       comm;
797   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
798   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
799   IS             Me,Notme;
800   PetscErrorCode ierr;
801   PetscInt       M,N,first,last,*notme,i;
802   PetscMPIInt    size;
803 
804   PetscFunctionBegin;
805 
806   /* Easy test: symmetric diagonal block */
807   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
808   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
809   if (!*f) PetscFunctionReturn(0);
810   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
811   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
812   if (size == 1) PetscFunctionReturn(0);
813 
814   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
815   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
816   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
817   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
818   for (i=0; i<first; i++) notme[i] = i;
819   for (i=last; i<M; i++) notme[i-last+first] = i;
820   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
821   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
822   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
823   Aoff = Aoffs[0];
824   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
825   Boff = Boffs[0];
826   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
827   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
828   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
829   ierr = ISDestroy(Me);CHKERRQ(ierr);
830   ierr = ISDestroy(Notme);CHKERRQ(ierr);
831 
832   PetscFunctionReturn(0);
833 }
834 EXTERN_C_END
835 
836 #undef __FUNCT__
837 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
838 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
839 {
840   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
841   PetscErrorCode ierr;
842 
843   PetscFunctionBegin;
844   /* do nondiagonal part */
845   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
846   /* send it on its way */
847   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
848   /* do local part */
849   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
850   /* receive remote parts */
851   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
852   PetscFunctionReturn(0);
853 }
854 
855 /*
856   This only works correctly for square matrices where the subblock A->A is the
857    diagonal block
858 */
859 #undef __FUNCT__
860 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
861 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
862 {
863   PetscErrorCode ierr;
864   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
865 
866   PetscFunctionBegin;
867   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
868   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
869   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "MatScale_MPIAIJ"
875 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
876 {
877   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
878   PetscErrorCode ierr;
879 
880   PetscFunctionBegin;
881   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
882   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
883   PetscFunctionReturn(0);
884 }
885 
886 #undef __FUNCT__
887 #define __FUNCT__ "MatDestroy_MPIAIJ"
888 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
889 {
890   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
891   PetscErrorCode ierr;
892 
893   PetscFunctionBegin;
894 #if defined(PETSC_USE_LOG)
895   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
896 #endif
897   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
898   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
899   if (aij->A){ierr = MatDestroy(aij->A);CHKERRQ(ierr);}
900   if (aij->B){ierr = MatDestroy(aij->B);CHKERRQ(ierr);}
901 #if defined (PETSC_USE_CTABLE)
902   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
903 #else
904   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
905 #endif
906   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
907   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
908   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
909   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
910   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
911   ierr = PetscFree(aij);CHKERRQ(ierr);
912 
913   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
914   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
915   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
916   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
917   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
918   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
919   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
920   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
921   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
922   PetscFunctionReturn(0);
923 }
924 
925 #undef __FUNCT__
926 #define __FUNCT__ "MatView_MPIAIJ_Binary"
927 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
928 {
929   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
930   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
931   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
932   PetscErrorCode    ierr;
933   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
934   int               fd;
935   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
936   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
937   PetscScalar       *column_values;
938 
939   PetscFunctionBegin;
940   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
941   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
942   nz   = A->nz + B->nz;
943   if (!rank) {
944     header[0] = MAT_FILE_CLASSID;
945     header[1] = mat->rmap->N;
946     header[2] = mat->cmap->N;
947     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
948     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
949     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
950     /* get largest number of rows any processor has */
951     rlen = mat->rmap->n;
952     range = mat->rmap->range;
953     for (i=1; i<size; i++) {
954       rlen = PetscMax(rlen,range[i+1] - range[i]);
955     }
956   } else {
957     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
958     rlen = mat->rmap->n;
959   }
960 
961   /* load up the local row counts */
962   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
963   for (i=0; i<mat->rmap->n; i++) {
964     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
965   }
966 
967   /* store the row lengths to the file */
968   if (!rank) {
969     MPI_Status status;
970     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
971     for (i=1; i<size; i++) {
972       rlen = range[i+1] - range[i];
973       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
974       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
975     }
976   } else {
977     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
978   }
979   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
980 
981   /* load up the local column indices */
982   nzmax = nz; /* )th processor needs space a largest processor needs */
983   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
984   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
985   cnt  = 0;
986   for (i=0; i<mat->rmap->n; i++) {
987     for (j=B->i[i]; j<B->i[i+1]; j++) {
988       if ( (col = garray[B->j[j]]) > cstart) break;
989       column_indices[cnt++] = col;
990     }
991     for (k=A->i[i]; k<A->i[i+1]; k++) {
992       column_indices[cnt++] = A->j[k] + cstart;
993     }
994     for (; j<B->i[i+1]; j++) {
995       column_indices[cnt++] = garray[B->j[j]];
996     }
997   }
998   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
999 
1000   /* store the column indices to the file */
1001   if (!rank) {
1002     MPI_Status status;
1003     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1004     for (i=1; i<size; i++) {
1005       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1006       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1007       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1008       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1009     }
1010   } else {
1011     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1012     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1013   }
1014   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1015 
1016   /* load up the local column values */
1017   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1018   cnt  = 0;
1019   for (i=0; i<mat->rmap->n; i++) {
1020     for (j=B->i[i]; j<B->i[i+1]; j++) {
1021       if ( garray[B->j[j]] > cstart) break;
1022       column_values[cnt++] = B->a[j];
1023     }
1024     for (k=A->i[i]; k<A->i[i+1]; k++) {
1025       column_values[cnt++] = A->a[k];
1026     }
1027     for (; j<B->i[i+1]; j++) {
1028       column_values[cnt++] = B->a[j];
1029     }
1030   }
1031   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1032 
1033   /* store the column values to the file */
1034   if (!rank) {
1035     MPI_Status status;
1036     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1037     for (i=1; i<size; i++) {
1038       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1039       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1040       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1041       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1042     }
1043   } else {
1044     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1045     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1046   }
1047   ierr = PetscFree(column_values);CHKERRQ(ierr);
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 #undef __FUNCT__
1052 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1053 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1054 {
1055   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1056   PetscErrorCode    ierr;
1057   PetscMPIInt       rank = aij->rank,size = aij->size;
1058   PetscBool         isdraw,iascii,isbinary;
1059   PetscViewer       sviewer;
1060   PetscViewerFormat format;
1061 
1062   PetscFunctionBegin;
1063   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1064   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1065   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1066   if (iascii) {
1067     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1068     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1069       MatInfo    info;
1070       PetscBool  inodes;
1071 
1072       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1073       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1074       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1075       if (!inodes) {
1076         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1077 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1078       } else {
1079         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1080 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1081       }
1082       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1083       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1084       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1085       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1086       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1087       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1088       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1089       PetscFunctionReturn(0);
1090     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1091       PetscInt   inodecount,inodelimit,*inodes;
1092       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1093       if (inodes) {
1094         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1095       } else {
1096         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1097       }
1098       PetscFunctionReturn(0);
1099     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1100       PetscFunctionReturn(0);
1101     }
1102   } else if (isbinary) {
1103     if (size == 1) {
1104       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1105       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1106     } else {
1107       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1108     }
1109     PetscFunctionReturn(0);
1110   } else if (isdraw) {
1111     PetscDraw  draw;
1112     PetscBool  isnull;
1113     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1114     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1115   }
1116 
1117   if (size == 1) {
1118     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1119     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1120   } else {
1121     /* assemble the entire matrix onto first processor. */
1122     Mat         A;
1123     Mat_SeqAIJ  *Aloc;
1124     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1125     MatScalar   *a;
1126 
1127     if (mat->rmap->N > 1024) {
1128       PetscBool  flg = PETSC_FALSE;
1129 
1130       ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1131       if (!flg) {
1132         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1133       }
1134     }
1135 
1136     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1137     if (!rank) {
1138       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1139     } else {
1140       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1141     }
1142     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1143     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1144     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1145     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1146 
1147     /* copy over the A part */
1148     Aloc = (Mat_SeqAIJ*)aij->A->data;
1149     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1150     row = mat->rmap->rstart;
1151     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1152     for (i=0; i<m; i++) {
1153       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1154       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1155     }
1156     aj = Aloc->j;
1157     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1158 
1159     /* copy over the B part */
1160     Aloc = (Mat_SeqAIJ*)aij->B->data;
1161     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1162     row  = mat->rmap->rstart;
1163     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1164     ct   = cols;
1165     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1166     for (i=0; i<m; i++) {
1167       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1168       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1169     }
1170     ierr = PetscFree(ct);CHKERRQ(ierr);
1171     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1172     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1173     /*
1174        Everyone has to call to draw the matrix since the graphics waits are
1175        synchronized across all processors that share the PetscDraw object
1176     */
1177     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1178     if (!rank) {
1179       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1180       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1181       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1182       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1183     }
1184     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1185     ierr = MatDestroy(A);CHKERRQ(ierr);
1186   }
1187   PetscFunctionReturn(0);
1188 }
1189 
1190 #undef __FUNCT__
1191 #define __FUNCT__ "MatView_MPIAIJ"
1192 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1193 {
1194   PetscErrorCode ierr;
1195   PetscBool      iascii,isdraw,issocket,isbinary;
1196 
1197   PetscFunctionBegin;
1198   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1199   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1200   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1201   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1202   if (iascii || isdraw || isbinary || issocket) {
1203     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1204   } else {
1205     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1206   }
1207   PetscFunctionReturn(0);
1208 }
1209 
1210 #undef __FUNCT__
1211 #define __FUNCT__ "MatSOR_MPIAIJ"
1212 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1213 {
1214   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1215   PetscErrorCode ierr;
1216   Vec            bb1 = 0;
1217   PetscBool      hasop;
1218 
1219   PetscFunctionBegin;
1220   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1221     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1222   }
1223 
1224   if (flag == SOR_APPLY_UPPER) {
1225     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1226     PetscFunctionReturn(0);
1227   }
1228 
1229   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1230     if (flag & SOR_ZERO_INITIAL_GUESS) {
1231       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1232       its--;
1233     }
1234 
1235     while (its--) {
1236       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1237       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1238 
1239       /* update rhs: bb1 = bb - B*x */
1240       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1241       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1242 
1243       /* local sweep */
1244       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1245     }
1246   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1247     if (flag & SOR_ZERO_INITIAL_GUESS) {
1248       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1249       its--;
1250     }
1251     while (its--) {
1252       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1253       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1254 
1255       /* update rhs: bb1 = bb - B*x */
1256       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1257       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1258 
1259       /* local sweep */
1260       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1261     }
1262   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1263     if (flag & SOR_ZERO_INITIAL_GUESS) {
1264       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1265       its--;
1266     }
1267     while (its--) {
1268       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1269       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1270 
1271       /* update rhs: bb1 = bb - B*x */
1272       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1273       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1274 
1275       /* local sweep */
1276       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1277     }
1278   }  else if (flag & SOR_EISENSTAT) {
1279     Vec         xx1;
1280 
1281     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1282     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1283 
1284     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1285     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1286     if (!mat->diag) {
1287       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1288       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1289     }
1290     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1291     if (hasop) {
1292       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1293     } else {
1294       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1295     }
1296     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1297 
1298     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1299 
1300     /* local sweep */
1301     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1302     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1303     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1304   } else {
1305     SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1306   }
1307 
1308   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1309   PetscFunctionReturn(0);
1310 }
1311 
1312 #undef __FUNCT__
1313 #define __FUNCT__ "MatPermute_MPIAIJ"
1314 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1315 {
1316   MPI_Comm       comm,pcomm;
1317   PetscInt       first,local_size,nrows;
1318   const PetscInt *rows;
1319   PetscMPIInt    size;
1320   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1321   PetscErrorCode ierr;
1322 
1323   PetscFunctionBegin;
1324   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1325   /* make a collective version of 'rowp' */
1326   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1327   if (pcomm==comm) {
1328     crowp = rowp;
1329   } else {
1330     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1331     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1332     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1333     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1334   }
1335   /* collect the global row permutation and invert it */
1336   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1337   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1338   if (pcomm!=comm) {
1339     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1340   }
1341   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1342   /* get the local target indices */
1343   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1344   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1345   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1346   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
1347   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1348   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1349   /* the column permutation is so much easier;
1350      make a local version of 'colp' and invert it */
1351   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1352   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1353   if (size==1) {
1354     lcolp = colp;
1355   } else {
1356     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1357     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1358     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
1359   }
1360   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1361   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1362   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1363   if (size>1) {
1364     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1365     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1366   }
1367   /* now we just get the submatrix */
1368   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1369   /* clean up */
1370   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1371   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1372   PetscFunctionReturn(0);
1373 }
1374 
1375 #undef __FUNCT__
1376 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1377 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1378 {
1379   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1380   Mat            A = mat->A,B = mat->B;
1381   PetscErrorCode ierr;
1382   PetscReal      isend[5],irecv[5];
1383 
1384   PetscFunctionBegin;
1385   info->block_size     = 1.0;
1386   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1387   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1388   isend[3] = info->memory;  isend[4] = info->mallocs;
1389   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1390   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1391   isend[3] += info->memory;  isend[4] += info->mallocs;
1392   if (flag == MAT_LOCAL) {
1393     info->nz_used      = isend[0];
1394     info->nz_allocated = isend[1];
1395     info->nz_unneeded  = isend[2];
1396     info->memory       = isend[3];
1397     info->mallocs      = isend[4];
1398   } else if (flag == MAT_GLOBAL_MAX) {
1399     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1400     info->nz_used      = irecv[0];
1401     info->nz_allocated = irecv[1];
1402     info->nz_unneeded  = irecv[2];
1403     info->memory       = irecv[3];
1404     info->mallocs      = irecv[4];
1405   } else if (flag == MAT_GLOBAL_SUM) {
1406     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1407     info->nz_used      = irecv[0];
1408     info->nz_allocated = irecv[1];
1409     info->nz_unneeded  = irecv[2];
1410     info->memory       = irecv[3];
1411     info->mallocs      = irecv[4];
1412   }
1413   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1414   info->fill_ratio_needed = 0;
1415   info->factor_mallocs    = 0;
1416 
1417   PetscFunctionReturn(0);
1418 }
1419 
1420 #undef __FUNCT__
1421 #define __FUNCT__ "MatSetOption_MPIAIJ"
1422 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1423 {
1424   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1425   PetscErrorCode ierr;
1426 
1427   PetscFunctionBegin;
1428   switch (op) {
1429   case MAT_NEW_NONZERO_LOCATIONS:
1430   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1431   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1432   case MAT_KEEP_NONZERO_PATTERN:
1433   case MAT_NEW_NONZERO_LOCATION_ERR:
1434   case MAT_USE_INODES:
1435   case MAT_IGNORE_ZERO_ENTRIES:
1436     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1437     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1438     break;
1439   case MAT_ROW_ORIENTED:
1440     a->roworiented = flg;
1441     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1442     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1443     break;
1444   case MAT_NEW_DIAGONALS:
1445     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1446     break;
1447   case MAT_IGNORE_OFF_PROC_ENTRIES:
1448     a->donotstash = PETSC_TRUE;
1449     break;
1450   case MAT_SPD:
1451     A->spd_set                         = PETSC_TRUE;
1452     A->spd                             = flg;
1453     if (flg) {
1454       A->symmetric                     = PETSC_TRUE;
1455       A->structurally_symmetric        = PETSC_TRUE;
1456       A->symmetric_set                 = PETSC_TRUE;
1457       A->structurally_symmetric_set    = PETSC_TRUE;
1458     }
1459     break;
1460   case MAT_SYMMETRIC:
1461     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1462     break;
1463   case MAT_STRUCTURALLY_SYMMETRIC:
1464     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1465     break;
1466   case MAT_HERMITIAN:
1467     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1468     break;
1469   case MAT_SYMMETRY_ETERNAL:
1470     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1471     break;
1472   default:
1473     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1474   }
1475   PetscFunctionReturn(0);
1476 }
1477 
1478 #undef __FUNCT__
1479 #define __FUNCT__ "MatGetRow_MPIAIJ"
1480 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1481 {
1482   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1483   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1484   PetscErrorCode ierr;
1485   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1486   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1487   PetscInt       *cmap,*idx_p;
1488 
1489   PetscFunctionBegin;
1490   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1491   mat->getrowactive = PETSC_TRUE;
1492 
1493   if (!mat->rowvalues && (idx || v)) {
1494     /*
1495         allocate enough space to hold information from the longest row.
1496     */
1497     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1498     PetscInt   max = 1,tmp;
1499     for (i=0; i<matin->rmap->n; i++) {
1500       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1501       if (max < tmp) { max = tmp; }
1502     }
1503     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1504   }
1505 
1506   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1507   lrow = row - rstart;
1508 
1509   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1510   if (!v)   {pvA = 0; pvB = 0;}
1511   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1512   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1513   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1514   nztot = nzA + nzB;
1515 
1516   cmap  = mat->garray;
1517   if (v  || idx) {
1518     if (nztot) {
1519       /* Sort by increasing column numbers, assuming A and B already sorted */
1520       PetscInt imark = -1;
1521       if (v) {
1522         *v = v_p = mat->rowvalues;
1523         for (i=0; i<nzB; i++) {
1524           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1525           else break;
1526         }
1527         imark = i;
1528         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1529         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1530       }
1531       if (idx) {
1532         *idx = idx_p = mat->rowindices;
1533         if (imark > -1) {
1534           for (i=0; i<imark; i++) {
1535             idx_p[i] = cmap[cworkB[i]];
1536           }
1537         } else {
1538           for (i=0; i<nzB; i++) {
1539             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1540             else break;
1541           }
1542           imark = i;
1543         }
1544         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1545         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1546       }
1547     } else {
1548       if (idx) *idx = 0;
1549       if (v)   *v   = 0;
1550     }
1551   }
1552   *nz = nztot;
1553   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1554   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1555   PetscFunctionReturn(0);
1556 }
1557 
1558 #undef __FUNCT__
1559 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1560 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1561 {
1562   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1563 
1564   PetscFunctionBegin;
1565   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1566   aij->getrowactive = PETSC_FALSE;
1567   PetscFunctionReturn(0);
1568 }
1569 
1570 #undef __FUNCT__
1571 #define __FUNCT__ "MatNorm_MPIAIJ"
1572 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1573 {
1574   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1575   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1576   PetscErrorCode ierr;
1577   PetscInt       i,j,cstart = mat->cmap->rstart;
1578   PetscReal      sum = 0.0;
1579   MatScalar      *v;
1580 
1581   PetscFunctionBegin;
1582   if (aij->size == 1) {
1583     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1584   } else {
1585     if (type == NORM_FROBENIUS) {
1586       v = amat->a;
1587       for (i=0; i<amat->nz; i++) {
1588 #if defined(PETSC_USE_COMPLEX)
1589         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1590 #else
1591         sum += (*v)*(*v); v++;
1592 #endif
1593       }
1594       v = bmat->a;
1595       for (i=0; i<bmat->nz; i++) {
1596 #if defined(PETSC_USE_COMPLEX)
1597         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1598 #else
1599         sum += (*v)*(*v); v++;
1600 #endif
1601       }
1602       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1603       *norm = sqrt(*norm);
1604     } else if (type == NORM_1) { /* max column norm */
1605       PetscReal *tmp,*tmp2;
1606       PetscInt  *jj,*garray = aij->garray;
1607       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1608       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1609       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1610       *norm = 0.0;
1611       v = amat->a; jj = amat->j;
1612       for (j=0; j<amat->nz; j++) {
1613         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1614       }
1615       v = bmat->a; jj = bmat->j;
1616       for (j=0; j<bmat->nz; j++) {
1617         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1618       }
1619       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1620       for (j=0; j<mat->cmap->N; j++) {
1621         if (tmp2[j] > *norm) *norm = tmp2[j];
1622       }
1623       ierr = PetscFree(tmp);CHKERRQ(ierr);
1624       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1625     } else if (type == NORM_INFINITY) { /* max row norm */
1626       PetscReal ntemp = 0.0;
1627       for (j=0; j<aij->A->rmap->n; j++) {
1628         v = amat->a + amat->i[j];
1629         sum = 0.0;
1630         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1631           sum += PetscAbsScalar(*v); v++;
1632         }
1633         v = bmat->a + bmat->i[j];
1634         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1635           sum += PetscAbsScalar(*v); v++;
1636         }
1637         if (sum > ntemp) ntemp = sum;
1638       }
1639       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1640     } else {
1641       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1642     }
1643   }
1644   PetscFunctionReturn(0);
1645 }
1646 
1647 #undef __FUNCT__
1648 #define __FUNCT__ "MatTranspose_MPIAIJ"
1649 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1650 {
1651   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1652   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1653   PetscErrorCode ierr;
1654   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1655   PetscInt       cstart=A->cmap->rstart,ncol;
1656   Mat            B;
1657   MatScalar      *array;
1658 
1659   PetscFunctionBegin;
1660   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1661 
1662   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1663   ai = Aloc->i; aj = Aloc->j;
1664   bi = Bloc->i; bj = Bloc->j;
1665   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1666     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1667     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1668     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1669     for (i=0; i<ai[ma]; i++){
1670       d_nnz[aj[i]] ++;
1671       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1672     }
1673 
1674     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1675     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1676     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1677     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1678     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1679   } else {
1680     B = *matout;
1681   }
1682 
1683   /* copy over the A part */
1684   array = Aloc->a;
1685   row = A->rmap->rstart;
1686   for (i=0; i<ma; i++) {
1687     ncol = ai[i+1]-ai[i];
1688     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1689     row++; array += ncol; aj += ncol;
1690   }
1691   aj = Aloc->j;
1692   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1693 
1694   /* copy over the B part */
1695   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1696   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1697   array = Bloc->a;
1698   row = A->rmap->rstart;
1699   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1700   cols_tmp = cols;
1701   for (i=0; i<mb; i++) {
1702     ncol = bi[i+1]-bi[i];
1703     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1704     row++; array += ncol; cols_tmp += ncol;
1705   }
1706   ierr = PetscFree(cols);CHKERRQ(ierr);
1707 
1708   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1709   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1710   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1711     *matout = B;
1712   } else {
1713     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1714   }
1715   PetscFunctionReturn(0);
1716 }
1717 
1718 #undef __FUNCT__
1719 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1720 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1721 {
1722   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1723   Mat            a = aij->A,b = aij->B;
1724   PetscErrorCode ierr;
1725   PetscInt       s1,s2,s3;
1726 
1727   PetscFunctionBegin;
1728   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1729   if (rr) {
1730     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1731     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1732     /* Overlap communication with computation. */
1733     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1734   }
1735   if (ll) {
1736     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1737     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1738     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1739   }
1740   /* scale  the diagonal block */
1741   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1742 
1743   if (rr) {
1744     /* Do a scatter end and then right scale the off-diagonal block */
1745     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1746     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1747   }
1748 
1749   PetscFunctionReturn(0);
1750 }
1751 
1752 #undef __FUNCT__
1753 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1754 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1755 {
1756   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1757   PetscErrorCode ierr;
1758 
1759   PetscFunctionBegin;
1760   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1761   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1762   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
1763   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
1764   PetscFunctionReturn(0);
1765 }
1766 #undef __FUNCT__
1767 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1768 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1769 {
1770   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1771   PetscErrorCode ierr;
1772 
1773   PetscFunctionBegin;
1774   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1775   PetscFunctionReturn(0);
1776 }
1777 
1778 #undef __FUNCT__
1779 #define __FUNCT__ "MatEqual_MPIAIJ"
1780 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
1781 {
1782   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1783   Mat            a,b,c,d;
1784   PetscBool      flg;
1785   PetscErrorCode ierr;
1786 
1787   PetscFunctionBegin;
1788   a = matA->A; b = matA->B;
1789   c = matB->A; d = matB->B;
1790 
1791   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1792   if (flg) {
1793     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1794   }
1795   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1796   PetscFunctionReturn(0);
1797 }
1798 
1799 #undef __FUNCT__
1800 #define __FUNCT__ "MatCopy_MPIAIJ"
1801 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1802 {
1803   PetscErrorCode ierr;
1804   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1805   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1806 
1807   PetscFunctionBegin;
1808   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1809   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1810     /* because of the column compression in the off-processor part of the matrix a->B,
1811        the number of columns in a->B and b->B may be different, hence we cannot call
1812        the MatCopy() directly on the two parts. If need be, we can provide a more
1813        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1814        then copying the submatrices */
1815     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1816   } else {
1817     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1818     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1819   }
1820   PetscFunctionReturn(0);
1821 }
1822 
1823 #undef __FUNCT__
1824 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1825 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1826 {
1827   PetscErrorCode ierr;
1828 
1829   PetscFunctionBegin;
1830   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1831   PetscFunctionReturn(0);
1832 }
1833 
1834 #undef __FUNCT__
1835 #define __FUNCT__ "MatAXPY_MPIAIJ"
1836 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1837 {
1838   PetscErrorCode ierr;
1839   PetscInt       i;
1840   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1841   PetscBLASInt   bnz,one=1;
1842   Mat_SeqAIJ     *x,*y;
1843 
1844   PetscFunctionBegin;
1845   if (str == SAME_NONZERO_PATTERN) {
1846     PetscScalar alpha = a;
1847     x = (Mat_SeqAIJ *)xx->A->data;
1848     y = (Mat_SeqAIJ *)yy->A->data;
1849     bnz = PetscBLASIntCast(x->nz);
1850     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1851     x = (Mat_SeqAIJ *)xx->B->data;
1852     y = (Mat_SeqAIJ *)yy->B->data;
1853     bnz = PetscBLASIntCast(x->nz);
1854     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1855   } else if (str == SUBSET_NONZERO_PATTERN) {
1856     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1857 
1858     x = (Mat_SeqAIJ *)xx->B->data;
1859     y = (Mat_SeqAIJ *)yy->B->data;
1860     if (y->xtoy && y->XtoY != xx->B) {
1861       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1862       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1863     }
1864     if (!y->xtoy) { /* get xtoy */
1865       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1866       y->XtoY = xx->B;
1867       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1868     }
1869     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1870   } else {
1871     Mat B;
1872     PetscInt *nnz_d,*nnz_o;
1873     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
1874     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
1875     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
1876     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1877     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1878     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
1879     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1880     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr);
1881     ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr);
1882     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1883     ierr = MatHeaderReplace(Y,B);
1884     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1885     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1886   }
1887   PetscFunctionReturn(0);
1888 }
1889 
1890 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1891 
1892 #undef __FUNCT__
1893 #define __FUNCT__ "MatConjugate_MPIAIJ"
1894 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1895 {
1896 #if defined(PETSC_USE_COMPLEX)
1897   PetscErrorCode ierr;
1898   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1899 
1900   PetscFunctionBegin;
1901   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1902   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1903 #else
1904   PetscFunctionBegin;
1905 #endif
1906   PetscFunctionReturn(0);
1907 }
1908 
1909 #undef __FUNCT__
1910 #define __FUNCT__ "MatRealPart_MPIAIJ"
1911 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1912 {
1913   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1914   PetscErrorCode ierr;
1915 
1916   PetscFunctionBegin;
1917   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1918   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1919   PetscFunctionReturn(0);
1920 }
1921 
1922 #undef __FUNCT__
1923 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1924 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1925 {
1926   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1927   PetscErrorCode ierr;
1928 
1929   PetscFunctionBegin;
1930   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1931   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1932   PetscFunctionReturn(0);
1933 }
1934 
1935 #ifdef PETSC_HAVE_PBGL
1936 
1937 #include <boost/parallel/mpi/bsp_process_group.hpp>
1938 #include <boost/graph/distributed/ilu_default_graph.hpp>
1939 #include <boost/graph/distributed/ilu_0_block.hpp>
1940 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1941 #include <boost/graph/distributed/petsc/interface.hpp>
1942 #include <boost/multi_array.hpp>
1943 #include <boost/parallel/distributed_property_map->hpp>
1944 
1945 #undef __FUNCT__
1946 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1947 /*
1948   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1949 */
1950 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1951 {
1952   namespace petsc = boost::distributed::petsc;
1953 
1954   namespace graph_dist = boost::graph::distributed;
1955   using boost::graph::distributed::ilu_default::process_group_type;
1956   using boost::graph::ilu_permuted;
1957 
1958   PetscBool       row_identity, col_identity;
1959   PetscContainer  c;
1960   PetscInt        m, n, M, N;
1961   PetscErrorCode  ierr;
1962 
1963   PetscFunctionBegin;
1964   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1965   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1966   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1967   if (!row_identity || !col_identity) {
1968     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1969   }
1970 
1971   process_group_type pg;
1972   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1973   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1974   lgraph_type&   level_graph = *lgraph_p;
1975   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1976 
1977   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1978   ilu_permuted(level_graph);
1979 
1980   /* put together the new matrix */
1981   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1982   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1983   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1984   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1985   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1986   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1987   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1988 
1989   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1990   ierr = PetscContainerSetPointer(c, lgraph_p);
1991   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1992   PetscFunctionReturn(0);
1993 }
1994 
1995 #undef __FUNCT__
1996 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1997 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1998 {
1999   PetscFunctionBegin;
2000   PetscFunctionReturn(0);
2001 }
2002 
2003 #undef __FUNCT__
2004 #define __FUNCT__ "MatSolve_MPIAIJ"
2005 /*
2006   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2007 */
2008 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2009 {
2010   namespace graph_dist = boost::graph::distributed;
2011 
2012   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2013   lgraph_type*   lgraph_p;
2014   PetscContainer c;
2015   PetscErrorCode ierr;
2016 
2017   PetscFunctionBegin;
2018   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2019   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2020   ierr = VecCopy(b, x);CHKERRQ(ierr);
2021 
2022   PetscScalar* array_x;
2023   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2024   PetscInt sx;
2025   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2026 
2027   PetscScalar* array_b;
2028   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2029   PetscInt sb;
2030   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2031 
2032   lgraph_type&   level_graph = *lgraph_p;
2033   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2034 
2035   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2036   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2037                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2038 
2039   typedef boost::iterator_property_map<array_ref_type::iterator,
2040                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2041   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2042                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2043 
2044   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2045 
2046   PetscFunctionReturn(0);
2047 }
2048 #endif
2049 
2050 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2051   PetscInt       nzlocal,nsends,nrecvs;
2052   PetscMPIInt    *send_rank,*recv_rank;
2053   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2054   PetscScalar    *sbuf_a,**rbuf_a;
2055   PetscErrorCode (*MatDestroy)(Mat);
2056 } Mat_Redundant;
2057 
2058 #undef __FUNCT__
2059 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2060 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2061 {
2062   PetscErrorCode       ierr;
2063   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2064   PetscInt             i;
2065 
2066   PetscFunctionBegin;
2067   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2068   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2069   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2070   for (i=0; i<redund->nrecvs; i++){
2071     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2072     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2073   }
2074   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2075   ierr = PetscFree(redund);CHKERRQ(ierr);
2076   PetscFunctionReturn(0);
2077 }
2078 
2079 #undef __FUNCT__
2080 #define __FUNCT__ "MatDestroy_MatRedundant"
2081 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2082 {
2083   PetscErrorCode  ierr;
2084   PetscContainer  container;
2085   Mat_Redundant   *redund=PETSC_NULL;
2086 
2087   PetscFunctionBegin;
2088   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2089   if (container) {
2090     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2091   } else {
2092     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2093   }
2094   A->ops->destroy = redund->MatDestroy;
2095   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2096   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2097   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2098   PetscFunctionReturn(0);
2099 }
2100 
2101 #undef __FUNCT__
2102 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2103 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2104 {
2105   PetscMPIInt    rank,size;
2106   MPI_Comm       comm=((PetscObject)mat)->comm;
2107   PetscErrorCode ierr;
2108   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2109   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2110   PetscInt       *rowrange=mat->rmap->range;
2111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2112   Mat            A=aij->A,B=aij->B,C=*matredundant;
2113   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2114   PetscScalar    *sbuf_a;
2115   PetscInt       nzlocal=a->nz+b->nz;
2116   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2117   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2118   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2119   MatScalar      *aworkA,*aworkB;
2120   PetscScalar    *vals;
2121   PetscMPIInt    tag1,tag2,tag3,imdex;
2122   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2123                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2124   MPI_Status     recv_status,*send_status;
2125   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2126   PetscInt       **rbuf_j=PETSC_NULL;
2127   PetscScalar    **rbuf_a=PETSC_NULL;
2128   Mat_Redundant  *redund=PETSC_NULL;
2129   PetscContainer container;
2130 
2131   PetscFunctionBegin;
2132   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2133   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2134 
2135   if (reuse == MAT_REUSE_MATRIX) {
2136     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2137     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2138     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2139     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2140     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2141     if (container) {
2142       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2143     } else {
2144       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2145     }
2146     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2147 
2148     nsends    = redund->nsends;
2149     nrecvs    = redund->nrecvs;
2150     send_rank = redund->send_rank;
2151     recv_rank = redund->recv_rank;
2152     sbuf_nz   = redund->sbuf_nz;
2153     rbuf_nz   = redund->rbuf_nz;
2154     sbuf_j    = redund->sbuf_j;
2155     sbuf_a    = redund->sbuf_a;
2156     rbuf_j    = redund->rbuf_j;
2157     rbuf_a    = redund->rbuf_a;
2158   }
2159 
2160   if (reuse == MAT_INITIAL_MATRIX){
2161     PetscMPIInt  subrank,subsize;
2162     PetscInt     nleftover,np_subcomm;
2163     /* get the destination processors' id send_rank, nsends and nrecvs */
2164     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2165     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2166     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2167     np_subcomm = size/nsubcomm;
2168     nleftover  = size - nsubcomm*np_subcomm;
2169     nsends = 0; nrecvs = 0;
2170     for (i=0; i<size; i++){ /* i=rank*/
2171       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2172         send_rank[nsends] = i; nsends++;
2173         recv_rank[nrecvs++] = i;
2174       }
2175     }
2176     if (rank >= size - nleftover){/* this proc is a leftover processor */
2177       i = size-nleftover-1;
2178       j = 0;
2179       while (j < nsubcomm - nleftover){
2180         send_rank[nsends++] = i;
2181         i--; j++;
2182       }
2183     }
2184 
2185     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2186       for (i=0; i<nleftover; i++){
2187         recv_rank[nrecvs++] = size-nleftover+i;
2188       }
2189     }
2190 
2191     /* allocate sbuf_j, sbuf_a */
2192     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2193     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2194     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2195   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2196 
2197   /* copy mat's local entries into the buffers */
2198   if (reuse == MAT_INITIAL_MATRIX){
2199     rownz_max = 0;
2200     rptr = sbuf_j;
2201     cols = sbuf_j + rend-rstart + 1;
2202     vals = sbuf_a;
2203     rptr[0] = 0;
2204     for (i=0; i<rend-rstart; i++){
2205       row = i + rstart;
2206       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2207       ncols  = nzA + nzB;
2208       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2209       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2210       /* load the column indices for this row into cols */
2211       lwrite = 0;
2212       for (l=0; l<nzB; l++) {
2213         if ((ctmp = bmap[cworkB[l]]) < cstart){
2214           vals[lwrite]   = aworkB[l];
2215           cols[lwrite++] = ctmp;
2216         }
2217       }
2218       for (l=0; l<nzA; l++){
2219         vals[lwrite]   = aworkA[l];
2220         cols[lwrite++] = cstart + cworkA[l];
2221       }
2222       for (l=0; l<nzB; l++) {
2223         if ((ctmp = bmap[cworkB[l]]) >= cend){
2224           vals[lwrite]   = aworkB[l];
2225           cols[lwrite++] = ctmp;
2226         }
2227       }
2228       vals += ncols;
2229       cols += ncols;
2230       rptr[i+1] = rptr[i] + ncols;
2231       if (rownz_max < ncols) rownz_max = ncols;
2232     }
2233     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2234   } else { /* only copy matrix values into sbuf_a */
2235     rptr = sbuf_j;
2236     vals = sbuf_a;
2237     rptr[0] = 0;
2238     for (i=0; i<rend-rstart; i++){
2239       row = i + rstart;
2240       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2241       ncols  = nzA + nzB;
2242       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2243       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2244       lwrite = 0;
2245       for (l=0; l<nzB; l++) {
2246         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2247       }
2248       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2249       for (l=0; l<nzB; l++) {
2250         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2251       }
2252       vals += ncols;
2253       rptr[i+1] = rptr[i] + ncols;
2254     }
2255   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2256 
2257   /* send nzlocal to others, and recv other's nzlocal */
2258   /*--------------------------------------------------*/
2259   if (reuse == MAT_INITIAL_MATRIX){
2260     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2261     s_waits2 = s_waits3 + nsends;
2262     s_waits1 = s_waits2 + nsends;
2263     r_waits1 = s_waits1 + nsends;
2264     r_waits2 = r_waits1 + nrecvs;
2265     r_waits3 = r_waits2 + nrecvs;
2266   } else {
2267     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2268     r_waits3 = s_waits3 + nsends;
2269   }
2270 
2271   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2272   if (reuse == MAT_INITIAL_MATRIX){
2273     /* get new tags to keep the communication clean */
2274     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2275     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2276     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2277 
2278     /* post receives of other's nzlocal */
2279     for (i=0; i<nrecvs; i++){
2280       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2281     }
2282     /* send nzlocal to others */
2283     for (i=0; i<nsends; i++){
2284       sbuf_nz[i] = nzlocal;
2285       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2286     }
2287     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2288     count = nrecvs;
2289     while (count) {
2290       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2291       recv_rank[imdex] = recv_status.MPI_SOURCE;
2292       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2293       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2294 
2295       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2296       rbuf_nz[imdex] += i + 2;
2297       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2298       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2299       count--;
2300     }
2301     /* wait on sends of nzlocal */
2302     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2303     /* send mat->i,j to others, and recv from other's */
2304     /*------------------------------------------------*/
2305     for (i=0; i<nsends; i++){
2306       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2307       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2308     }
2309     /* wait on receives of mat->i,j */
2310     /*------------------------------*/
2311     count = nrecvs;
2312     while (count) {
2313       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2314       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2315       count--;
2316     }
2317     /* wait on sends of mat->i,j */
2318     /*---------------------------*/
2319     if (nsends) {
2320       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2321     }
2322   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2323 
2324   /* post receives, send and receive mat->a */
2325   /*----------------------------------------*/
2326   for (imdex=0; imdex<nrecvs; imdex++) {
2327     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2328   }
2329   for (i=0; i<nsends; i++){
2330     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2331   }
2332   count = nrecvs;
2333   while (count) {
2334     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2335     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2336     count--;
2337   }
2338   if (nsends) {
2339     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2340   }
2341 
2342   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2343 
2344   /* create redundant matrix */
2345   /*-------------------------*/
2346   if (reuse == MAT_INITIAL_MATRIX){
2347     /* compute rownz_max for preallocation */
2348     for (imdex=0; imdex<nrecvs; imdex++){
2349       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2350       rptr = rbuf_j[imdex];
2351       for (i=0; i<j; i++){
2352         ncols = rptr[i+1] - rptr[i];
2353         if (rownz_max < ncols) rownz_max = ncols;
2354       }
2355     }
2356 
2357     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2358     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2359     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2360     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2361     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2362   } else {
2363     C = *matredundant;
2364   }
2365 
2366   /* insert local matrix entries */
2367   rptr = sbuf_j;
2368   cols = sbuf_j + rend-rstart + 1;
2369   vals = sbuf_a;
2370   for (i=0; i<rend-rstart; i++){
2371     row   = i + rstart;
2372     ncols = rptr[i+1] - rptr[i];
2373     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2374     vals += ncols;
2375     cols += ncols;
2376   }
2377   /* insert received matrix entries */
2378   for (imdex=0; imdex<nrecvs; imdex++){
2379     rstart = rowrange[recv_rank[imdex]];
2380     rend   = rowrange[recv_rank[imdex]+1];
2381     rptr = rbuf_j[imdex];
2382     cols = rbuf_j[imdex] + rend-rstart + 1;
2383     vals = rbuf_a[imdex];
2384     for (i=0; i<rend-rstart; i++){
2385       row   = i + rstart;
2386       ncols = rptr[i+1] - rptr[i];
2387       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2388       vals += ncols;
2389       cols += ncols;
2390     }
2391   }
2392   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2393   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2394   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2395   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2396   if (reuse == MAT_INITIAL_MATRIX){
2397     PetscContainer container;
2398     *matredundant = C;
2399     /* create a supporting struct and attach it to C for reuse */
2400     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2401     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2402     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2403     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2404     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2405 
2406     redund->nzlocal = nzlocal;
2407     redund->nsends  = nsends;
2408     redund->nrecvs  = nrecvs;
2409     redund->send_rank = send_rank;
2410     redund->recv_rank = recv_rank;
2411     redund->sbuf_nz = sbuf_nz;
2412     redund->rbuf_nz = rbuf_nz;
2413     redund->sbuf_j  = sbuf_j;
2414     redund->sbuf_a  = sbuf_a;
2415     redund->rbuf_j  = rbuf_j;
2416     redund->rbuf_a  = rbuf_a;
2417 
2418     redund->MatDestroy = C->ops->destroy;
2419     C->ops->destroy    = MatDestroy_MatRedundant;
2420   }
2421   PetscFunctionReturn(0);
2422 }
2423 
2424 #undef __FUNCT__
2425 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2426 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2427 {
2428   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2429   PetscErrorCode ierr;
2430   PetscInt       i,*idxb = 0;
2431   PetscScalar    *va,*vb;
2432   Vec            vtmp;
2433 
2434   PetscFunctionBegin;
2435   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2436   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2437   if (idx) {
2438     for (i=0; i<A->rmap->n; i++) {
2439       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2440     }
2441   }
2442 
2443   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2444   if (idx) {
2445     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2446   }
2447   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2448   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2449 
2450   for (i=0; i<A->rmap->n; i++){
2451     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2452       va[i] = vb[i];
2453       if (idx) idx[i] = a->garray[idxb[i]];
2454     }
2455   }
2456 
2457   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2458   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2459   if (idxb) {
2460     ierr = PetscFree(idxb);CHKERRQ(ierr);
2461   }
2462   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2463   PetscFunctionReturn(0);
2464 }
2465 
2466 #undef __FUNCT__
2467 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2468 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2469 {
2470   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2471   PetscErrorCode ierr;
2472   PetscInt       i,*idxb = 0;
2473   PetscScalar    *va,*vb;
2474   Vec            vtmp;
2475 
2476   PetscFunctionBegin;
2477   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2478   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2479   if (idx) {
2480     for (i=0; i<A->cmap->n; i++) {
2481       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2482     }
2483   }
2484 
2485   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2486   if (idx) {
2487     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2488   }
2489   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2490   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2491 
2492   for (i=0; i<A->rmap->n; i++){
2493     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2494       va[i] = vb[i];
2495       if (idx) idx[i] = a->garray[idxb[i]];
2496     }
2497   }
2498 
2499   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2500   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2501   if (idxb) {
2502     ierr = PetscFree(idxb);CHKERRQ(ierr);
2503   }
2504   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2505   PetscFunctionReturn(0);
2506 }
2507 
2508 #undef __FUNCT__
2509 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2510 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2511 {
2512   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2513   PetscInt       n      = A->rmap->n;
2514   PetscInt       cstart = A->cmap->rstart;
2515   PetscInt      *cmap   = mat->garray;
2516   PetscInt      *diagIdx, *offdiagIdx;
2517   Vec            diagV, offdiagV;
2518   PetscScalar   *a, *diagA, *offdiagA;
2519   PetscInt       r;
2520   PetscErrorCode ierr;
2521 
2522   PetscFunctionBegin;
2523   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2524   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2525   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2526   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2527   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2528   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2529   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2530   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2531   for(r = 0; r < n; ++r) {
2532     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2533       a[r]   = diagA[r];
2534       idx[r] = cstart + diagIdx[r];
2535     } else {
2536       a[r]   = offdiagA[r];
2537       idx[r] = cmap[offdiagIdx[r]];
2538     }
2539   }
2540   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2541   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2542   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2543   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2544   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2545   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2546   PetscFunctionReturn(0);
2547 }
2548 
2549 #undef __FUNCT__
2550 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2551 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2552 {
2553   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2554   PetscInt       n      = A->rmap->n;
2555   PetscInt       cstart = A->cmap->rstart;
2556   PetscInt      *cmap   = mat->garray;
2557   PetscInt      *diagIdx, *offdiagIdx;
2558   Vec            diagV, offdiagV;
2559   PetscScalar   *a, *diagA, *offdiagA;
2560   PetscInt       r;
2561   PetscErrorCode ierr;
2562 
2563   PetscFunctionBegin;
2564   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2565   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2566   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2567   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2568   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2569   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2570   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2571   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2572   for(r = 0; r < n; ++r) {
2573     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2574       a[r]   = diagA[r];
2575       idx[r] = cstart + diagIdx[r];
2576     } else {
2577       a[r]   = offdiagA[r];
2578       idx[r] = cmap[offdiagIdx[r]];
2579     }
2580   }
2581   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2582   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2583   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2584   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2585   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2586   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2587   PetscFunctionReturn(0);
2588 }
2589 
2590 #undef __FUNCT__
2591 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2592 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2593 {
2594   PetscErrorCode ierr;
2595   Mat            *dummy;
2596 
2597   PetscFunctionBegin;
2598   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2599   *newmat = *dummy;
2600   ierr = PetscFree(dummy);CHKERRQ(ierr);
2601   PetscFunctionReturn(0);
2602 }
2603 
2604 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2605 /* -------------------------------------------------------------------*/
2606 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2607        MatGetRow_MPIAIJ,
2608        MatRestoreRow_MPIAIJ,
2609        MatMult_MPIAIJ,
2610 /* 4*/ MatMultAdd_MPIAIJ,
2611        MatMultTranspose_MPIAIJ,
2612        MatMultTransposeAdd_MPIAIJ,
2613 #ifdef PETSC_HAVE_PBGL
2614        MatSolve_MPIAIJ,
2615 #else
2616        0,
2617 #endif
2618        0,
2619        0,
2620 /*10*/ 0,
2621        0,
2622        0,
2623        MatSOR_MPIAIJ,
2624        MatTranspose_MPIAIJ,
2625 /*15*/ MatGetInfo_MPIAIJ,
2626        MatEqual_MPIAIJ,
2627        MatGetDiagonal_MPIAIJ,
2628        MatDiagonalScale_MPIAIJ,
2629        MatNorm_MPIAIJ,
2630 /*20*/ MatAssemblyBegin_MPIAIJ,
2631        MatAssemblyEnd_MPIAIJ,
2632        MatSetOption_MPIAIJ,
2633        MatZeroEntries_MPIAIJ,
2634 /*24*/ MatZeroRows_MPIAIJ,
2635        0,
2636 #ifdef PETSC_HAVE_PBGL
2637        0,
2638 #else
2639        0,
2640 #endif
2641        0,
2642        0,
2643 /*29*/ MatSetUpPreallocation_MPIAIJ,
2644 #ifdef PETSC_HAVE_PBGL
2645        0,
2646 #else
2647        0,
2648 #endif
2649        0,
2650        0,
2651        0,
2652 /*34*/ MatDuplicate_MPIAIJ,
2653        0,
2654        0,
2655        0,
2656        0,
2657 /*39*/ MatAXPY_MPIAIJ,
2658        MatGetSubMatrices_MPIAIJ,
2659        MatIncreaseOverlap_MPIAIJ,
2660        MatGetValues_MPIAIJ,
2661        MatCopy_MPIAIJ,
2662 /*44*/ MatGetRowMax_MPIAIJ,
2663        MatScale_MPIAIJ,
2664        0,
2665        0,
2666        0,
2667 /*49*/ MatSetBlockSize_MPIAIJ,
2668        0,
2669        0,
2670        0,
2671        0,
2672 /*54*/ MatFDColoringCreate_MPIAIJ,
2673        0,
2674        MatSetUnfactored_MPIAIJ,
2675        MatPermute_MPIAIJ,
2676        0,
2677 /*59*/ MatGetSubMatrix_MPIAIJ,
2678        MatDestroy_MPIAIJ,
2679        MatView_MPIAIJ,
2680        0,
2681        0,
2682 /*64*/ 0,
2683        0,
2684        0,
2685        0,
2686        0,
2687 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2688        MatGetRowMinAbs_MPIAIJ,
2689        0,
2690        MatSetColoring_MPIAIJ,
2691 #if defined(PETSC_HAVE_ADIC)
2692        MatSetValuesAdic_MPIAIJ,
2693 #else
2694        0,
2695 #endif
2696        MatSetValuesAdifor_MPIAIJ,
2697 /*75*/ MatFDColoringApply_AIJ,
2698        0,
2699        0,
2700        0,
2701        0,
2702 /*80*/ 0,
2703        0,
2704        0,
2705 /*83*/ MatLoad_MPIAIJ,
2706        0,
2707        0,
2708        0,
2709        0,
2710        0,
2711 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2712        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2713        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2714        MatPtAP_Basic,
2715        MatPtAPSymbolic_MPIAIJ,
2716 /*94*/ MatPtAPNumeric_MPIAIJ,
2717        0,
2718        0,
2719        0,
2720        0,
2721 /*99*/ 0,
2722        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2723        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2724        MatConjugate_MPIAIJ,
2725        0,
2726 /*104*/MatSetValuesRow_MPIAIJ,
2727        MatRealPart_MPIAIJ,
2728        MatImaginaryPart_MPIAIJ,
2729        0,
2730        0,
2731 /*109*/0,
2732        MatGetRedundantMatrix_MPIAIJ,
2733        MatGetRowMin_MPIAIJ,
2734        0,
2735        0,
2736 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
2737        0,
2738        0,
2739        0,
2740        0,
2741 /*119*/0,
2742        0,
2743        0,
2744        0,
2745        MatGetMultiProcBlock_MPIAIJ
2746 };
2747 
2748 /* ----------------------------------------------------------------------------------------*/
2749 
2750 EXTERN_C_BEGIN
2751 #undef __FUNCT__
2752 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2753 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2754 {
2755   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2756   PetscErrorCode ierr;
2757 
2758   PetscFunctionBegin;
2759   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2760   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2761   PetscFunctionReturn(0);
2762 }
2763 EXTERN_C_END
2764 
2765 EXTERN_C_BEGIN
2766 #undef __FUNCT__
2767 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2768 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2769 {
2770   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2771   PetscErrorCode ierr;
2772 
2773   PetscFunctionBegin;
2774   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2775   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2776   PetscFunctionReturn(0);
2777 }
2778 EXTERN_C_END
2779 
2780 EXTERN_C_BEGIN
2781 #undef __FUNCT__
2782 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2783 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2784 {
2785   Mat_MPIAIJ     *b;
2786   PetscErrorCode ierr;
2787   PetscInt       i;
2788 
2789   PetscFunctionBegin;
2790   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2791   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2792   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2793   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2794 
2795   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2796   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2797   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2798   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2799   if (d_nnz) {
2800     for (i=0; i<B->rmap->n; i++) {
2801       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2802     }
2803   }
2804   if (o_nnz) {
2805     for (i=0; i<B->rmap->n; i++) {
2806       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2807     }
2808   }
2809   b = (Mat_MPIAIJ*)B->data;
2810 
2811   if (!B->preallocated) {
2812     /* Explicitly create 2 MATSEQAIJ matrices. */
2813     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2814     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2815     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2816     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2817     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2818     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2819     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2820     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2821   }
2822 
2823   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2824   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2825   B->preallocated = PETSC_TRUE;
2826   PetscFunctionReturn(0);
2827 }
2828 EXTERN_C_END
2829 
2830 #undef __FUNCT__
2831 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2832 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2833 {
2834   Mat            mat;
2835   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2836   PetscErrorCode ierr;
2837 
2838   PetscFunctionBegin;
2839   *newmat       = 0;
2840   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2841   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2842   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2843   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2844   a    = (Mat_MPIAIJ*)mat->data;
2845 
2846   mat->factortype    = matin->factortype;
2847   mat->rmap->bs      = matin->rmap->bs;
2848   mat->assembled    = PETSC_TRUE;
2849   mat->insertmode   = NOT_SET_VALUES;
2850   mat->preallocated = PETSC_TRUE;
2851 
2852   a->size           = oldmat->size;
2853   a->rank           = oldmat->rank;
2854   a->donotstash     = oldmat->donotstash;
2855   a->roworiented    = oldmat->roworiented;
2856   a->rowindices     = 0;
2857   a->rowvalues      = 0;
2858   a->getrowactive   = PETSC_FALSE;
2859 
2860   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2861   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2862 
2863   if (oldmat->colmap) {
2864 #if defined (PETSC_USE_CTABLE)
2865     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2866 #else
2867     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2868     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2869     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2870 #endif
2871   } else a->colmap = 0;
2872   if (oldmat->garray) {
2873     PetscInt len;
2874     len  = oldmat->B->cmap->n;
2875     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2876     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2877     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2878   } else a->garray = 0;
2879 
2880   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2881   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2882   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2883   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2884   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2885   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2886   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2887   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2888   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2889   *newmat = mat;
2890   PetscFunctionReturn(0);
2891 }
2892 
2893 /*
2894     Allows sending/receiving larger messages then 2 gigabytes in a single call
2895 */
2896 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm)
2897 {
2898   int             ierr;
2899   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
2900   PetscInt        i,numchunks;
2901   PetscMPIInt     icnt;
2902 
2903   numchunks = cnt/CHUNKSIZE + 1;
2904   for (i=0; i<numchunks; i++) {
2905     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
2906     ierr = MPI_Send(mess,icnt,type,to,tag,comm);
2907     if (type == MPIU_INT) {
2908       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
2909     } else if (type == MPIU_SCALAR) {
2910       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
2911     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
2912   }
2913   return 0;
2914 }
2915 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm)
2916 {
2917   int             ierr;
2918   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
2919   MPI_Status      status;
2920   PetscInt        i,numchunks;
2921   PetscMPIInt     icnt;
2922 
2923   numchunks = cnt/CHUNKSIZE + 1;
2924   for (i=0; i<numchunks; i++) {
2925     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
2926     ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status);
2927     if (type == MPIU_INT) {
2928       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
2929     } else if (type == MPIU_SCALAR) {
2930       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
2931     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
2932   }
2933   return 0;
2934 }
2935 
2936 #undef __FUNCT__
2937 #define __FUNCT__ "MatLoad_MPIAIJ"
2938 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2939 {
2940   PetscScalar    *vals,*svals;
2941   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2942   PetscErrorCode ierr;
2943   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2944   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
2945   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2946   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2947   PetscInt       cend,cstart,n,*rowners,sizesset=1;
2948   int            fd;
2949 
2950   PetscFunctionBegin;
2951   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2952   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2953   if (!rank) {
2954     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2955     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2956     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2957   }
2958 
2959   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
2960 
2961   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2962   M = header[1]; N = header[2];
2963   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2964   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
2965   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
2966 
2967   /* If global sizes are set, check if they are consistent with that given in the file */
2968   if (sizesset) {
2969     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
2970   }
2971   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2972   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2973 
2974   /* determine ownership of all rows */
2975   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
2976   else m = newMat->rmap->n; /* Set by user */
2977 
2978   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2979   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2980 
2981   /* First process needs enough room for process with most rows */
2982   if (!rank) {
2983     mmax       = rowners[1];
2984     for (i=2; i<size; i++) {
2985       mmax = PetscMax(mmax,rowners[i]);
2986     }
2987   } else mmax = m;
2988 
2989   rowners[0] = 0;
2990   for (i=2; i<=size; i++) {
2991     rowners[i] += rowners[i-1];
2992   }
2993   rstart = rowners[rank];
2994   rend   = rowners[rank+1];
2995 
2996   /* distribute row lengths to all processors */
2997   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2998   if (!rank) {
2999     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3000     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3001     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3002     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3003     for (j=0; j<m; j++) {
3004       procsnz[0] += ourlens[j];
3005     }
3006     for (i=1; i<size; i++) {
3007       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3008       /* calculate the number of nonzeros on each processor */
3009       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3010         procsnz[i] += rowlengths[j];
3011       }
3012       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3013     }
3014     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3015   } else {
3016     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3017   }
3018 
3019   if (!rank) {
3020     /* determine max buffer needed and allocate it */
3021     maxnz = 0;
3022     for (i=0; i<size; i++) {
3023       maxnz = PetscMax(maxnz,procsnz[i]);
3024     }
3025     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3026 
3027     /* read in my part of the matrix column indices  */
3028     nz   = procsnz[0];
3029     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3030     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3031 
3032     /* read in every one elses and ship off */
3033     for (i=1; i<size; i++) {
3034       nz     = procsnz[i];
3035       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3036       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3037     }
3038     ierr = PetscFree(cols);CHKERRQ(ierr);
3039   } else {
3040     /* determine buffer space needed for message */
3041     nz = 0;
3042     for (i=0; i<m; i++) {
3043       nz += ourlens[i];
3044     }
3045     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3046 
3047     /* receive message of column indices*/
3048     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3049   }
3050 
3051   /* determine column ownership if matrix is not square */
3052   if (N != M) {
3053     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3054     else n = newMat->cmap->n;
3055     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3056     cstart = cend - n;
3057   } else {
3058     cstart = rstart;
3059     cend   = rend;
3060     n      = cend - cstart;
3061   }
3062 
3063   /* loop over local rows, determining number of off diagonal entries */
3064   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3065   jj = 0;
3066   for (i=0; i<m; i++) {
3067     for (j=0; j<ourlens[i]; j++) {
3068       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3069       jj++;
3070     }
3071   }
3072 
3073   for (i=0; i<m; i++) {
3074     ourlens[i] -= offlens[i];
3075   }
3076   if (!sizesset) {
3077     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3078   }
3079   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3080 
3081   for (i=0; i<m; i++) {
3082     ourlens[i] += offlens[i];
3083   }
3084 
3085   if (!rank) {
3086     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3087 
3088     /* read in my part of the matrix numerical values  */
3089     nz   = procsnz[0];
3090     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3091 
3092     /* insert into matrix */
3093     jj      = rstart;
3094     smycols = mycols;
3095     svals   = vals;
3096     for (i=0; i<m; i++) {
3097       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3098       smycols += ourlens[i];
3099       svals   += ourlens[i];
3100       jj++;
3101     }
3102 
3103     /* read in other processors and ship out */
3104     for (i=1; i<size; i++) {
3105       nz     = procsnz[i];
3106       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3107       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3108     }
3109     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3110   } else {
3111     /* receive numeric values */
3112     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3113 
3114     /* receive message of values*/
3115     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3116 
3117     /* insert into matrix */
3118     jj      = rstart;
3119     smycols = mycols;
3120     svals   = vals;
3121     for (i=0; i<m; i++) {
3122       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3123       smycols += ourlens[i];
3124       svals   += ourlens[i];
3125       jj++;
3126     }
3127   }
3128   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3129   ierr = PetscFree(vals);CHKERRQ(ierr);
3130   ierr = PetscFree(mycols);CHKERRQ(ierr);
3131   ierr = PetscFree(rowners);CHKERRQ(ierr);
3132 
3133   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3134   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3135   PetscFunctionReturn(0);
3136 }
3137 
3138 #undef __FUNCT__
3139 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3140 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3141 {
3142   PetscErrorCode ierr;
3143   IS             iscol_local;
3144   PetscInt       csize;
3145 
3146   PetscFunctionBegin;
3147   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3148   if (call == MAT_REUSE_MATRIX) {
3149     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3150     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3151   } else {
3152     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3153   }
3154   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3155   if (call == MAT_INITIAL_MATRIX) {
3156     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3157     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3158   }
3159   PetscFunctionReturn(0);
3160 }
3161 
3162 #undef __FUNCT__
3163 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3164 /*
3165     Not great since it makes two copies of the submatrix, first an SeqAIJ
3166   in local and then by concatenating the local matrices the end result.
3167   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3168 
3169   Note: This requires a sequential iscol with all indices.
3170 */
3171 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3172 {
3173   PetscErrorCode ierr;
3174   PetscMPIInt    rank,size;
3175   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3176   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3177   Mat            *local,M,Mreuse;
3178   MatScalar      *vwork,*aa;
3179   MPI_Comm       comm = ((PetscObject)mat)->comm;
3180   Mat_SeqAIJ     *aij;
3181 
3182 
3183   PetscFunctionBegin;
3184   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3185   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3186 
3187   if (call ==  MAT_REUSE_MATRIX) {
3188     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3189     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3190     local = &Mreuse;
3191     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3192   } else {
3193     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3194     Mreuse = *local;
3195     ierr   = PetscFree(local);CHKERRQ(ierr);
3196   }
3197 
3198   /*
3199       m - number of local rows
3200       n - number of columns (same on all processors)
3201       rstart - first row in new global matrix generated
3202   */
3203   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3204   if (call == MAT_INITIAL_MATRIX) {
3205     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3206     ii  = aij->i;
3207     jj  = aij->j;
3208 
3209     /*
3210         Determine the number of non-zeros in the diagonal and off-diagonal
3211         portions of the matrix in order to do correct preallocation
3212     */
3213 
3214     /* first get start and end of "diagonal" columns */
3215     if (csize == PETSC_DECIDE) {
3216       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3217       if (mglobal == n) { /* square matrix */
3218 	nlocal = m;
3219       } else {
3220         nlocal = n/size + ((n % size) > rank);
3221       }
3222     } else {
3223       nlocal = csize;
3224     }
3225     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3226     rstart = rend - nlocal;
3227     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3228 
3229     /* next, compute all the lengths */
3230     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3231     olens = dlens + m;
3232     for (i=0; i<m; i++) {
3233       jend = ii[i+1] - ii[i];
3234       olen = 0;
3235       dlen = 0;
3236       for (j=0; j<jend; j++) {
3237         if (*jj < rstart || *jj >= rend) olen++;
3238         else dlen++;
3239         jj++;
3240       }
3241       olens[i] = olen;
3242       dlens[i] = dlen;
3243     }
3244     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3245     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3246     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3247     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3248     ierr = PetscFree(dlens);CHKERRQ(ierr);
3249   } else {
3250     PetscInt ml,nl;
3251 
3252     M = *newmat;
3253     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3254     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3255     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3256     /*
3257          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3258        rather than the slower MatSetValues().
3259     */
3260     M->was_assembled = PETSC_TRUE;
3261     M->assembled     = PETSC_FALSE;
3262   }
3263   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3264   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3265   ii  = aij->i;
3266   jj  = aij->j;
3267   aa  = aij->a;
3268   for (i=0; i<m; i++) {
3269     row   = rstart + i;
3270     nz    = ii[i+1] - ii[i];
3271     cwork = jj;     jj += nz;
3272     vwork = aa;     aa += nz;
3273     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3274   }
3275 
3276   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3277   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3278   *newmat = M;
3279 
3280   /* save submatrix used in processor for next request */
3281   if (call ==  MAT_INITIAL_MATRIX) {
3282     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3283     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3284   }
3285 
3286   PetscFunctionReturn(0);
3287 }
3288 
3289 EXTERN_C_BEGIN
3290 #undef __FUNCT__
3291 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3292 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3293 {
3294   PetscInt       m,cstart, cend,j,nnz,i,d;
3295   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3296   const PetscInt *JJ;
3297   PetscScalar    *values;
3298   PetscErrorCode ierr;
3299 
3300   PetscFunctionBegin;
3301   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3302 
3303   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3304   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3305   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3306   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3307   m      = B->rmap->n;
3308   cstart = B->cmap->rstart;
3309   cend   = B->cmap->rend;
3310   rstart = B->rmap->rstart;
3311 
3312   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3313 
3314 #if defined(PETSC_USE_DEBUGGING)
3315   for (i=0; i<m; i++) {
3316     nnz     = Ii[i+1]- Ii[i];
3317     JJ      = J + Ii[i];
3318     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3319     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3320     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3321   }
3322 #endif
3323 
3324   for (i=0; i<m; i++) {
3325     nnz     = Ii[i+1]- Ii[i];
3326     JJ      = J + Ii[i];
3327     nnz_max = PetscMax(nnz_max,nnz);
3328     d       = 0;
3329     for (j=0; j<nnz; j++) {
3330       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3331     }
3332     d_nnz[i] = d;
3333     o_nnz[i] = nnz - d;
3334   }
3335   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3336   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3337 
3338   if (v) values = (PetscScalar*)v;
3339   else {
3340     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3341     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3342   }
3343 
3344   for (i=0; i<m; i++) {
3345     ii   = i + rstart;
3346     nnz  = Ii[i+1]- Ii[i];
3347     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3348   }
3349   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3350   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3351 
3352   if (!v) {
3353     ierr = PetscFree(values);CHKERRQ(ierr);
3354   }
3355   PetscFunctionReturn(0);
3356 }
3357 EXTERN_C_END
3358 
3359 #undef __FUNCT__
3360 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3361 /*@
3362    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3363    (the default parallel PETSc format).
3364 
3365    Collective on MPI_Comm
3366 
3367    Input Parameters:
3368 +  B - the matrix
3369 .  i - the indices into j for the start of each local row (starts with zero)
3370 .  j - the column indices for each local row (starts with zero)
3371 -  v - optional values in the matrix
3372 
3373    Level: developer
3374 
3375    Notes:
3376        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3377      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3378      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3379 
3380        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3381 
3382        The format which is used for the sparse matrix input, is equivalent to a
3383     row-major ordering.. i.e for the following matrix, the input data expected is
3384     as shown:
3385 
3386         1 0 0
3387         2 0 3     P0
3388        -------
3389         4 5 6     P1
3390 
3391      Process0 [P0]: rows_owned=[0,1]
3392         i =  {0,1,3}  [size = nrow+1  = 2+1]
3393         j =  {0,0,2}  [size = nz = 6]
3394         v =  {1,2,3}  [size = nz = 6]
3395 
3396      Process1 [P1]: rows_owned=[2]
3397         i =  {0,3}    [size = nrow+1  = 1+1]
3398         j =  {0,1,2}  [size = nz = 6]
3399         v =  {4,5,6}  [size = nz = 6]
3400 
3401 .keywords: matrix, aij, compressed row, sparse, parallel
3402 
3403 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3404           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3405 @*/
3406 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3407 {
3408   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3409 
3410   PetscFunctionBegin;
3411   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3412   if (f) {
3413     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3414   }
3415   PetscFunctionReturn(0);
3416 }
3417 
3418 #undef __FUNCT__
3419 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3420 /*@C
3421    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3422    (the default parallel PETSc format).  For good matrix assembly performance
3423    the user should preallocate the matrix storage by setting the parameters
3424    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3425    performance can be increased by more than a factor of 50.
3426 
3427    Collective on MPI_Comm
3428 
3429    Input Parameters:
3430 +  A - the matrix
3431 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3432            (same value is used for all local rows)
3433 .  d_nnz - array containing the number of nonzeros in the various rows of the
3434            DIAGONAL portion of the local submatrix (possibly different for each row)
3435            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3436            The size of this array is equal to the number of local rows, i.e 'm'.
3437            You must leave room for the diagonal entry even if it is zero.
3438 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3439            submatrix (same value is used for all local rows).
3440 -  o_nnz - array containing the number of nonzeros in the various rows of the
3441            OFF-DIAGONAL portion of the local submatrix (possibly different for
3442            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3443            structure. The size of this array is equal to the number
3444            of local rows, i.e 'm'.
3445 
3446    If the *_nnz parameter is given then the *_nz parameter is ignored
3447 
3448    The AIJ format (also called the Yale sparse matrix format or
3449    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3450    storage.  The stored row and column indices begin with zero.
3451    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3452 
3453    The parallel matrix is partitioned such that the first m0 rows belong to
3454    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3455    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3456 
3457    The DIAGONAL portion of the local submatrix of a processor can be defined
3458    as the submatrix which is obtained by extraction the part corresponding to
3459    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3460    first row that belongs to the processor, r2 is the last row belonging to
3461    the this processor, and c1-c2 is range of indices of the local part of a
3462    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3463    common case of a square matrix, the row and column ranges are the same and
3464    the DIAGONAL part is also square. The remaining portion of the local
3465    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3466 
3467    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3468 
3469    You can call MatGetInfo() to get information on how effective the preallocation was;
3470    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3471    You can also run with the option -info and look for messages with the string
3472    malloc in them to see if additional memory allocation was needed.
3473 
3474    Example usage:
3475 
3476    Consider the following 8x8 matrix with 34 non-zero values, that is
3477    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3478    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3479    as follows:
3480 
3481 .vb
3482             1  2  0  |  0  3  0  |  0  4
3483     Proc0   0  5  6  |  7  0  0  |  8  0
3484             9  0 10  | 11  0  0  | 12  0
3485     -------------------------------------
3486            13  0 14  | 15 16 17  |  0  0
3487     Proc1   0 18  0  | 19 20 21  |  0  0
3488             0  0  0  | 22 23  0  | 24  0
3489     -------------------------------------
3490     Proc2  25 26 27  |  0  0 28  | 29  0
3491            30  0  0  | 31 32 33  |  0 34
3492 .ve
3493 
3494    This can be represented as a collection of submatrices as:
3495 
3496 .vb
3497       A B C
3498       D E F
3499       G H I
3500 .ve
3501 
3502    Where the submatrices A,B,C are owned by proc0, D,E,F are
3503    owned by proc1, G,H,I are owned by proc2.
3504 
3505    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3506    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3507    The 'M','N' parameters are 8,8, and have the same values on all procs.
3508 
3509    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3510    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3511    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3512    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3513    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3514    matrix, ans [DF] as another SeqAIJ matrix.
3515 
3516    When d_nz, o_nz parameters are specified, d_nz storage elements are
3517    allocated for every row of the local diagonal submatrix, and o_nz
3518    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3519    One way to choose d_nz and o_nz is to use the max nonzerors per local
3520    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3521    In this case, the values of d_nz,o_nz are:
3522 .vb
3523      proc0 : dnz = 2, o_nz = 2
3524      proc1 : dnz = 3, o_nz = 2
3525      proc2 : dnz = 1, o_nz = 4
3526 .ve
3527    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3528    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3529    for proc3. i.e we are using 12+15+10=37 storage locations to store
3530    34 values.
3531 
3532    When d_nnz, o_nnz parameters are specified, the storage is specified
3533    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3534    In the above case the values for d_nnz,o_nnz are:
3535 .vb
3536      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3537      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3538      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3539 .ve
3540    Here the space allocated is sum of all the above values i.e 34, and
3541    hence pre-allocation is perfect.
3542 
3543    Level: intermediate
3544 
3545 .keywords: matrix, aij, compressed row, sparse, parallel
3546 
3547 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3548           MPIAIJ, MatGetInfo()
3549 @*/
3550 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3551 {
3552   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3553 
3554   PetscFunctionBegin;
3555   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3556   if (f) {
3557     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3558   }
3559   PetscFunctionReturn(0);
3560 }
3561 
3562 #undef __FUNCT__
3563 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3564 /*@
3565      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3566          CSR format the local rows.
3567 
3568    Collective on MPI_Comm
3569 
3570    Input Parameters:
3571 +  comm - MPI communicator
3572 .  m - number of local rows (Cannot be PETSC_DECIDE)
3573 .  n - This value should be the same as the local size used in creating the
3574        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3575        calculated if N is given) For square matrices n is almost always m.
3576 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3577 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3578 .   i - row indices
3579 .   j - column indices
3580 -   a - matrix values
3581 
3582    Output Parameter:
3583 .   mat - the matrix
3584 
3585    Level: intermediate
3586 
3587    Notes:
3588        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3589      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3590      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3591 
3592        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3593 
3594        The format which is used for the sparse matrix input, is equivalent to a
3595     row-major ordering.. i.e for the following matrix, the input data expected is
3596     as shown:
3597 
3598         1 0 0
3599         2 0 3     P0
3600        -------
3601         4 5 6     P1
3602 
3603      Process0 [P0]: rows_owned=[0,1]
3604         i =  {0,1,3}  [size = nrow+1  = 2+1]
3605         j =  {0,0,2}  [size = nz = 6]
3606         v =  {1,2,3}  [size = nz = 6]
3607 
3608      Process1 [P1]: rows_owned=[2]
3609         i =  {0,3}    [size = nrow+1  = 1+1]
3610         j =  {0,1,2}  [size = nz = 6]
3611         v =  {4,5,6}  [size = nz = 6]
3612 
3613 .keywords: matrix, aij, compressed row, sparse, parallel
3614 
3615 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3616           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3617 @*/
3618 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3619 {
3620   PetscErrorCode ierr;
3621 
3622  PetscFunctionBegin;
3623   if (i[0]) {
3624     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3625   }
3626   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3627   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3628   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3629   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3630   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3631   PetscFunctionReturn(0);
3632 }
3633 
3634 #undef __FUNCT__
3635 #define __FUNCT__ "MatCreateMPIAIJ"
3636 /*@C
3637    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3638    (the default parallel PETSc format).  For good matrix assembly performance
3639    the user should preallocate the matrix storage by setting the parameters
3640    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3641    performance can be increased by more than a factor of 50.
3642 
3643    Collective on MPI_Comm
3644 
3645    Input Parameters:
3646 +  comm - MPI communicator
3647 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3648            This value should be the same as the local size used in creating the
3649            y vector for the matrix-vector product y = Ax.
3650 .  n - This value should be the same as the local size used in creating the
3651        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3652        calculated if N is given) For square matrices n is almost always m.
3653 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3654 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3655 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3656            (same value is used for all local rows)
3657 .  d_nnz - array containing the number of nonzeros in the various rows of the
3658            DIAGONAL portion of the local submatrix (possibly different for each row)
3659            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3660            The size of this array is equal to the number of local rows, i.e 'm'.
3661            You must leave room for the diagonal entry even if it is zero.
3662 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3663            submatrix (same value is used for all local rows).
3664 -  o_nnz - array containing the number of nonzeros in the various rows of the
3665            OFF-DIAGONAL portion of the local submatrix (possibly different for
3666            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3667            structure. The size of this array is equal to the number
3668            of local rows, i.e 'm'.
3669 
3670    Output Parameter:
3671 .  A - the matrix
3672 
3673    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3674    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3675    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3676 
3677    Notes:
3678    If the *_nnz parameter is given then the *_nz parameter is ignored
3679 
3680    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3681    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3682    storage requirements for this matrix.
3683 
3684    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3685    processor than it must be used on all processors that share the object for
3686    that argument.
3687 
3688    The user MUST specify either the local or global matrix dimensions
3689    (possibly both).
3690 
3691    The parallel matrix is partitioned across processors such that the
3692    first m0 rows belong to process 0, the next m1 rows belong to
3693    process 1, the next m2 rows belong to process 2 etc.. where
3694    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3695    values corresponding to [m x N] submatrix.
3696 
3697    The columns are logically partitioned with the n0 columns belonging
3698    to 0th partition, the next n1 columns belonging to the next
3699    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3700 
3701    The DIAGONAL portion of the local submatrix on any given processor
3702    is the submatrix corresponding to the rows and columns m,n
3703    corresponding to the given processor. i.e diagonal matrix on
3704    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3705    etc. The remaining portion of the local submatrix [m x (N-n)]
3706    constitute the OFF-DIAGONAL portion. The example below better
3707    illustrates this concept.
3708 
3709    For a square global matrix we define each processor's diagonal portion
3710    to be its local rows and the corresponding columns (a square submatrix);
3711    each processor's off-diagonal portion encompasses the remainder of the
3712    local matrix (a rectangular submatrix).
3713 
3714    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3715 
3716    When calling this routine with a single process communicator, a matrix of
3717    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3718    type of communicator, use the construction mechanism:
3719      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3720 
3721    By default, this format uses inodes (identical nodes) when possible.
3722    We search for consecutive rows with the same nonzero structure, thereby
3723    reusing matrix information to achieve increased efficiency.
3724 
3725    Options Database Keys:
3726 +  -mat_no_inode  - Do not use inodes
3727 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3728 -  -mat_aij_oneindex - Internally use indexing starting at 1
3729         rather than 0.  Note that when calling MatSetValues(),
3730         the user still MUST index entries starting at 0!
3731 
3732 
3733    Example usage:
3734 
3735    Consider the following 8x8 matrix with 34 non-zero values, that is
3736    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3737    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3738    as follows:
3739 
3740 .vb
3741             1  2  0  |  0  3  0  |  0  4
3742     Proc0   0  5  6  |  7  0  0  |  8  0
3743             9  0 10  | 11  0  0  | 12  0
3744     -------------------------------------
3745            13  0 14  | 15 16 17  |  0  0
3746     Proc1   0 18  0  | 19 20 21  |  0  0
3747             0  0  0  | 22 23  0  | 24  0
3748     -------------------------------------
3749     Proc2  25 26 27  |  0  0 28  | 29  0
3750            30  0  0  | 31 32 33  |  0 34
3751 .ve
3752 
3753    This can be represented as a collection of submatrices as:
3754 
3755 .vb
3756       A B C
3757       D E F
3758       G H I
3759 .ve
3760 
3761    Where the submatrices A,B,C are owned by proc0, D,E,F are
3762    owned by proc1, G,H,I are owned by proc2.
3763 
3764    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3765    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3766    The 'M','N' parameters are 8,8, and have the same values on all procs.
3767 
3768    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3769    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3770    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3771    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3772    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3773    matrix, ans [DF] as another SeqAIJ matrix.
3774 
3775    When d_nz, o_nz parameters are specified, d_nz storage elements are
3776    allocated for every row of the local diagonal submatrix, and o_nz
3777    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3778    One way to choose d_nz and o_nz is to use the max nonzerors per local
3779    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3780    In this case, the values of d_nz,o_nz are:
3781 .vb
3782      proc0 : dnz = 2, o_nz = 2
3783      proc1 : dnz = 3, o_nz = 2
3784      proc2 : dnz = 1, o_nz = 4
3785 .ve
3786    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3787    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3788    for proc3. i.e we are using 12+15+10=37 storage locations to store
3789    34 values.
3790 
3791    When d_nnz, o_nnz parameters are specified, the storage is specified
3792    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3793    In the above case the values for d_nnz,o_nnz are:
3794 .vb
3795      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3796      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3797      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3798 .ve
3799    Here the space allocated is sum of all the above values i.e 34, and
3800    hence pre-allocation is perfect.
3801 
3802    Level: intermediate
3803 
3804 .keywords: matrix, aij, compressed row, sparse, parallel
3805 
3806 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3807           MPIAIJ, MatCreateMPIAIJWithArrays()
3808 @*/
3809 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3810 {
3811   PetscErrorCode ierr;
3812   PetscMPIInt    size;
3813 
3814   PetscFunctionBegin;
3815   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3816   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3817   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3818   if (size > 1) {
3819     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3820     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3821   } else {
3822     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3823     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3824   }
3825   PetscFunctionReturn(0);
3826 }
3827 
3828 #undef __FUNCT__
3829 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3830 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3831 {
3832   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3833 
3834   PetscFunctionBegin;
3835   *Ad     = a->A;
3836   *Ao     = a->B;
3837   *colmap = a->garray;
3838   PetscFunctionReturn(0);
3839 }
3840 
3841 #undef __FUNCT__
3842 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3843 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3844 {
3845   PetscErrorCode ierr;
3846   PetscInt       i;
3847   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3848 
3849   PetscFunctionBegin;
3850   if (coloring->ctype == IS_COLORING_GLOBAL) {
3851     ISColoringValue *allcolors,*colors;
3852     ISColoring      ocoloring;
3853 
3854     /* set coloring for diagonal portion */
3855     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3856 
3857     /* set coloring for off-diagonal portion */
3858     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3859     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3860     for (i=0; i<a->B->cmap->n; i++) {
3861       colors[i] = allcolors[a->garray[i]];
3862     }
3863     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3864     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3865     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3866     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3867   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3868     ISColoringValue *colors;
3869     PetscInt        *larray;
3870     ISColoring      ocoloring;
3871 
3872     /* set coloring for diagonal portion */
3873     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3874     for (i=0; i<a->A->cmap->n; i++) {
3875       larray[i] = i + A->cmap->rstart;
3876     }
3877     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3878     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3879     for (i=0; i<a->A->cmap->n; i++) {
3880       colors[i] = coloring->colors[larray[i]];
3881     }
3882     ierr = PetscFree(larray);CHKERRQ(ierr);
3883     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3884     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3885     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3886 
3887     /* set coloring for off-diagonal portion */
3888     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3889     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3890     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3891     for (i=0; i<a->B->cmap->n; i++) {
3892       colors[i] = coloring->colors[larray[i]];
3893     }
3894     ierr = PetscFree(larray);CHKERRQ(ierr);
3895     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3896     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3897     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3898   } else {
3899     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3900   }
3901 
3902   PetscFunctionReturn(0);
3903 }
3904 
3905 #if defined(PETSC_HAVE_ADIC)
3906 #undef __FUNCT__
3907 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3908 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3909 {
3910   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3911   PetscErrorCode ierr;
3912 
3913   PetscFunctionBegin;
3914   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3915   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3916   PetscFunctionReturn(0);
3917 }
3918 #endif
3919 
3920 #undef __FUNCT__
3921 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3922 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3923 {
3924   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3925   PetscErrorCode ierr;
3926 
3927   PetscFunctionBegin;
3928   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3929   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3930   PetscFunctionReturn(0);
3931 }
3932 
3933 #undef __FUNCT__
3934 #define __FUNCT__ "MatMerge"
3935 /*@
3936       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3937                  matrices from each processor
3938 
3939     Collective on MPI_Comm
3940 
3941    Input Parameters:
3942 +    comm - the communicators the parallel matrix will live on
3943 .    inmat - the input sequential matrices
3944 .    n - number of local columns (or PETSC_DECIDE)
3945 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3946 
3947    Output Parameter:
3948 .    outmat - the parallel matrix generated
3949 
3950     Level: advanced
3951 
3952    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3953 
3954 @*/
3955 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3956 {
3957   PetscErrorCode ierr;
3958   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3959   PetscInt       *indx;
3960   PetscScalar    *values;
3961 
3962   PetscFunctionBegin;
3963   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3964   if (scall == MAT_INITIAL_MATRIX){
3965     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3966     if (n == PETSC_DECIDE){
3967       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3968     }
3969     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3970     rstart -= m;
3971 
3972     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3973     for (i=0;i<m;i++) {
3974       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3975       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3976       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3977     }
3978     /* This routine will ONLY return MPIAIJ type matrix */
3979     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3980     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3981     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3982     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3983     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3984 
3985   } else if (scall == MAT_REUSE_MATRIX){
3986     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3987   } else {
3988     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3989   }
3990 
3991   for (i=0;i<m;i++) {
3992     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3993     Ii    = i + rstart;
3994     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3995     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3996   }
3997   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3998   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3999   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4000 
4001   PetscFunctionReturn(0);
4002 }
4003 
4004 #undef __FUNCT__
4005 #define __FUNCT__ "MatFileSplit"
4006 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4007 {
4008   PetscErrorCode    ierr;
4009   PetscMPIInt       rank;
4010   PetscInt          m,N,i,rstart,nnz;
4011   size_t            len;
4012   const PetscInt    *indx;
4013   PetscViewer       out;
4014   char              *name;
4015   Mat               B;
4016   const PetscScalar *values;
4017 
4018   PetscFunctionBegin;
4019   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4020   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4021   /* Should this be the type of the diagonal block of A? */
4022   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4023   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4024   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4025   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4026   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4027   for (i=0;i<m;i++) {
4028     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4029     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4030     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4031   }
4032   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4033   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4034 
4035   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4036   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4037   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4038   sprintf(name,"%s.%d",outfile,rank);
4039   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4040   ierr = PetscFree(name);
4041   ierr = MatView(B,out);CHKERRQ(ierr);
4042   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
4043   ierr = MatDestroy(B);CHKERRQ(ierr);
4044   PetscFunctionReturn(0);
4045 }
4046 
4047 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
4048 #undef __FUNCT__
4049 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4050 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4051 {
4052   PetscErrorCode       ierr;
4053   Mat_Merge_SeqsToMPI  *merge;
4054   PetscContainer       container;
4055 
4056   PetscFunctionBegin;
4057   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4058   if (container) {
4059     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4060     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4061     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4062     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4063     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4064     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4065     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4066     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4067     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4068     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4069     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4070     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4071     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4072     ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr);
4073 
4074     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4075     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4076   }
4077   ierr = PetscFree(merge);CHKERRQ(ierr);
4078 
4079   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4080   PetscFunctionReturn(0);
4081 }
4082 
4083 #include "../src/mat/utils/freespace.h"
4084 #include "petscbt.h"
4085 
4086 #undef __FUNCT__
4087 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4088 /*@C
4089       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4090                  matrices from each processor
4091 
4092     Collective on MPI_Comm
4093 
4094    Input Parameters:
4095 +    comm - the communicators the parallel matrix will live on
4096 .    seqmat - the input sequential matrices
4097 .    m - number of local rows (or PETSC_DECIDE)
4098 .    n - number of local columns (or PETSC_DECIDE)
4099 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4100 
4101    Output Parameter:
4102 .    mpimat - the parallel matrix generated
4103 
4104     Level: advanced
4105 
4106    Notes:
4107      The dimensions of the sequential matrix in each processor MUST be the same.
4108      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4109      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4110 @*/
4111 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4112 {
4113   PetscErrorCode       ierr;
4114   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4115   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4116   PetscMPIInt          size,rank,taga,*len_s;
4117   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4118   PetscInt             proc,m;
4119   PetscInt             **buf_ri,**buf_rj;
4120   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4121   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4122   MPI_Request          *s_waits,*r_waits;
4123   MPI_Status           *status;
4124   MatScalar            *aa=a->a;
4125   MatScalar            **abuf_r,*ba_i;
4126   Mat_Merge_SeqsToMPI  *merge;
4127   PetscContainer       container;
4128 
4129   PetscFunctionBegin;
4130   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4131 
4132   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4133   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4134 
4135   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4136   if (container) {
4137     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4138   }
4139   bi     = merge->bi;
4140   bj     = merge->bj;
4141   buf_ri = merge->buf_ri;
4142   buf_rj = merge->buf_rj;
4143 
4144   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4145   owners = merge->rowmap->range;
4146   len_s  = merge->len_s;
4147 
4148   /* send and recv matrix values */
4149   /*-----------------------------*/
4150   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4151   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4152 
4153   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4154   for (proc=0,k=0; proc<size; proc++){
4155     if (!len_s[proc]) continue;
4156     i = owners[proc];
4157     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4158     k++;
4159   }
4160 
4161   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4162   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4163   ierr = PetscFree(status);CHKERRQ(ierr);
4164 
4165   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4166   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4167 
4168   /* insert mat values of mpimat */
4169   /*----------------------------*/
4170   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4171   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4172 
4173   for (k=0; k<merge->nrecv; k++){
4174     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4175     nrows = *(buf_ri_k[k]);
4176     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4177     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4178   }
4179 
4180   /* set values of ba */
4181   m = merge->rowmap->n;
4182   for (i=0; i<m; i++) {
4183     arow = owners[rank] + i;
4184     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4185     bnzi = bi[i+1] - bi[i];
4186     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4187 
4188     /* add local non-zero vals of this proc's seqmat into ba */
4189     anzi = ai[arow+1] - ai[arow];
4190     aj   = a->j + ai[arow];
4191     aa   = a->a + ai[arow];
4192     nextaj = 0;
4193     for (j=0; nextaj<anzi; j++){
4194       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4195         ba_i[j] += aa[nextaj++];
4196       }
4197     }
4198 
4199     /* add received vals into ba */
4200     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4201       /* i-th row */
4202       if (i == *nextrow[k]) {
4203         anzi = *(nextai[k]+1) - *nextai[k];
4204         aj   = buf_rj[k] + *(nextai[k]);
4205         aa   = abuf_r[k] + *(nextai[k]);
4206         nextaj = 0;
4207         for (j=0; nextaj<anzi; j++){
4208           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4209             ba_i[j] += aa[nextaj++];
4210           }
4211         }
4212         nextrow[k]++; nextai[k]++;
4213       }
4214     }
4215     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4216   }
4217   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4218   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4219 
4220   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4221   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4222   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4223   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4224   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4225   PetscFunctionReturn(0);
4226 }
4227 
4228 #undef __FUNCT__
4229 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4230 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4231 {
4232   PetscErrorCode       ierr;
4233   Mat                  B_mpi;
4234   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4235   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4236   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4237   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4238   PetscInt             len,proc,*dnz,*onz;
4239   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4240   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4241   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4242   MPI_Status           *status;
4243   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4244   PetscBT              lnkbt;
4245   Mat_Merge_SeqsToMPI  *merge;
4246   PetscContainer       container;
4247 
4248   PetscFunctionBegin;
4249   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4250 
4251   /* make sure it is a PETSc comm */
4252   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4253   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4254   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4255 
4256   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4257   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4258 
4259   /* determine row ownership */
4260   /*---------------------------------------------------------*/
4261   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4262   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4263   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4264   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4265   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4266   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4267   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4268 
4269   m      = merge->rowmap->n;
4270   M      = merge->rowmap->N;
4271   owners = merge->rowmap->range;
4272 
4273   /* determine the number of messages to send, their lengths */
4274   /*---------------------------------------------------------*/
4275   len_s  = merge->len_s;
4276 
4277   len = 0;  /* length of buf_si[] */
4278   merge->nsend = 0;
4279   for (proc=0; proc<size; proc++){
4280     len_si[proc] = 0;
4281     if (proc == rank){
4282       len_s[proc] = 0;
4283     } else {
4284       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4285       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4286     }
4287     if (len_s[proc]) {
4288       merge->nsend++;
4289       nrows = 0;
4290       for (i=owners[proc]; i<owners[proc+1]; i++){
4291         if (ai[i+1] > ai[i]) nrows++;
4292       }
4293       len_si[proc] = 2*(nrows+1);
4294       len += len_si[proc];
4295     }
4296   }
4297 
4298   /* determine the number and length of messages to receive for ij-structure */
4299   /*-------------------------------------------------------------------------*/
4300   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4301   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4302 
4303   /* post the Irecv of j-structure */
4304   /*-------------------------------*/
4305   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4306   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4307 
4308   /* post the Isend of j-structure */
4309   /*--------------------------------*/
4310   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4311 
4312   for (proc=0, k=0; proc<size; proc++){
4313     if (!len_s[proc]) continue;
4314     i = owners[proc];
4315     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4316     k++;
4317   }
4318 
4319   /* receives and sends of j-structure are complete */
4320   /*------------------------------------------------*/
4321   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4322   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4323 
4324   /* send and recv i-structure */
4325   /*---------------------------*/
4326   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4327   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4328 
4329   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4330   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4331   for (proc=0,k=0; proc<size; proc++){
4332     if (!len_s[proc]) continue;
4333     /* form outgoing message for i-structure:
4334          buf_si[0]:                 nrows to be sent
4335                [1:nrows]:           row index (global)
4336                [nrows+1:2*nrows+1]: i-structure index
4337     */
4338     /*-------------------------------------------*/
4339     nrows = len_si[proc]/2 - 1;
4340     buf_si_i    = buf_si + nrows+1;
4341     buf_si[0]   = nrows;
4342     buf_si_i[0] = 0;
4343     nrows = 0;
4344     for (i=owners[proc]; i<owners[proc+1]; i++){
4345       anzi = ai[i+1] - ai[i];
4346       if (anzi) {
4347         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4348         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4349         nrows++;
4350       }
4351     }
4352     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4353     k++;
4354     buf_si += len_si[proc];
4355   }
4356 
4357   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4358   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4359 
4360   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4361   for (i=0; i<merge->nrecv; i++){
4362     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4363   }
4364 
4365   ierr = PetscFree(len_si);CHKERRQ(ierr);
4366   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4367   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4368   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4369   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4370   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4371   ierr = PetscFree(status);CHKERRQ(ierr);
4372 
4373   /* compute a local seq matrix in each processor */
4374   /*----------------------------------------------*/
4375   /* allocate bi array and free space for accumulating nonzero column info */
4376   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4377   bi[0] = 0;
4378 
4379   /* create and initialize a linked list */
4380   nlnk = N+1;
4381   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4382 
4383   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4384   len = 0;
4385   len  = ai[owners[rank+1]] - ai[owners[rank]];
4386   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4387   current_space = free_space;
4388 
4389   /* determine symbolic info for each local row */
4390   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4391 
4392   for (k=0; k<merge->nrecv; k++){
4393     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4394     nrows = *buf_ri_k[k];
4395     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4396     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4397   }
4398 
4399   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4400   len = 0;
4401   for (i=0;i<m;i++) {
4402     bnzi   = 0;
4403     /* add local non-zero cols of this proc's seqmat into lnk */
4404     arow   = owners[rank] + i;
4405     anzi   = ai[arow+1] - ai[arow];
4406     aj     = a->j + ai[arow];
4407     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4408     bnzi += nlnk;
4409     /* add received col data into lnk */
4410     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4411       if (i == *nextrow[k]) { /* i-th row */
4412         anzi = *(nextai[k]+1) - *nextai[k];
4413         aj   = buf_rj[k] + *nextai[k];
4414         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4415         bnzi += nlnk;
4416         nextrow[k]++; nextai[k]++;
4417       }
4418     }
4419     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4420 
4421     /* if free space is not available, make more free space */
4422     if (current_space->local_remaining<bnzi) {
4423       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4424       nspacedouble++;
4425     }
4426     /* copy data into free space, then initialize lnk */
4427     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4428     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4429 
4430     current_space->array           += bnzi;
4431     current_space->local_used      += bnzi;
4432     current_space->local_remaining -= bnzi;
4433 
4434     bi[i+1] = bi[i] + bnzi;
4435   }
4436 
4437   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4438 
4439   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4440   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4441   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4442 
4443   /* create symbolic parallel matrix B_mpi */
4444   /*---------------------------------------*/
4445   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4446   if (n==PETSC_DECIDE) {
4447     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4448   } else {
4449     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4450   }
4451   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4452   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4453   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4454 
4455   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4456   B_mpi->assembled     = PETSC_FALSE;
4457   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4458   merge->bi            = bi;
4459   merge->bj            = bj;
4460   merge->buf_ri        = buf_ri;
4461   merge->buf_rj        = buf_rj;
4462   merge->coi           = PETSC_NULL;
4463   merge->coj           = PETSC_NULL;
4464   merge->owners_co     = PETSC_NULL;
4465 
4466   /* attach the supporting struct to B_mpi for reuse */
4467   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4468   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4469   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4470   *mpimat = B_mpi;
4471 
4472   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4473   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4474   PetscFunctionReturn(0);
4475 }
4476 
4477 #undef __FUNCT__
4478 #define __FUNCT__ "MatMerge_SeqsToMPI"
4479 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4480 {
4481   PetscErrorCode   ierr;
4482 
4483   PetscFunctionBegin;
4484   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4485   if (scall == MAT_INITIAL_MATRIX){
4486     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4487   }
4488   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4489   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4490   PetscFunctionReturn(0);
4491 }
4492 
4493 #undef __FUNCT__
4494 #define __FUNCT__ "MatGetLocalMat"
4495 /*@
4496      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4497 
4498     Not Collective
4499 
4500    Input Parameters:
4501 +    A - the matrix
4502 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4503 
4504    Output Parameter:
4505 .    A_loc - the local sequential matrix generated
4506 
4507     Level: developer
4508 
4509 @*/
4510 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4511 {
4512   PetscErrorCode  ierr;
4513   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4514   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4515   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4516   MatScalar       *aa=a->a,*ba=b->a,*cam;
4517   PetscScalar     *ca;
4518   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4519   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4520 
4521   PetscFunctionBegin;
4522   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4523   if (scall == MAT_INITIAL_MATRIX){
4524     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4525     ci[0] = 0;
4526     for (i=0; i<am; i++){
4527       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4528     }
4529     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4530     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4531     k = 0;
4532     for (i=0; i<am; i++) {
4533       ncols_o = bi[i+1] - bi[i];
4534       ncols_d = ai[i+1] - ai[i];
4535       /* off-diagonal portion of A */
4536       for (jo=0; jo<ncols_o; jo++) {
4537         col = cmap[*bj];
4538         if (col >= cstart) break;
4539         cj[k]   = col; bj++;
4540         ca[k++] = *ba++;
4541       }
4542       /* diagonal portion of A */
4543       for (j=0; j<ncols_d; j++) {
4544         cj[k]   = cstart + *aj++;
4545         ca[k++] = *aa++;
4546       }
4547       /* off-diagonal portion of A */
4548       for (j=jo; j<ncols_o; j++) {
4549         cj[k]   = cmap[*bj++];
4550         ca[k++] = *ba++;
4551       }
4552     }
4553     /* put together the new matrix */
4554     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4555     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4556     /* Since these are PETSc arrays, change flags to free them as necessary. */
4557     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4558     mat->free_a  = PETSC_TRUE;
4559     mat->free_ij = PETSC_TRUE;
4560     mat->nonew   = 0;
4561   } else if (scall == MAT_REUSE_MATRIX){
4562     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4563     ci = mat->i; cj = mat->j; cam = mat->a;
4564     for (i=0; i<am; i++) {
4565       /* off-diagonal portion of A */
4566       ncols_o = bi[i+1] - bi[i];
4567       for (jo=0; jo<ncols_o; jo++) {
4568         col = cmap[*bj];
4569         if (col >= cstart) break;
4570         *cam++ = *ba++; bj++;
4571       }
4572       /* diagonal portion of A */
4573       ncols_d = ai[i+1] - ai[i];
4574       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4575       /* off-diagonal portion of A */
4576       for (j=jo; j<ncols_o; j++) {
4577         *cam++ = *ba++; bj++;
4578       }
4579     }
4580   } else {
4581     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4582   }
4583 
4584   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4585   PetscFunctionReturn(0);
4586 }
4587 
4588 #undef __FUNCT__
4589 #define __FUNCT__ "MatGetLocalMatCondensed"
4590 /*@C
4591      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4592 
4593     Not Collective
4594 
4595    Input Parameters:
4596 +    A - the matrix
4597 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4598 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4599 
4600    Output Parameter:
4601 .    A_loc - the local sequential matrix generated
4602 
4603     Level: developer
4604 
4605 @*/
4606 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4607 {
4608   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4609   PetscErrorCode    ierr;
4610   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4611   IS                isrowa,iscola;
4612   Mat               *aloc;
4613 
4614   PetscFunctionBegin;
4615   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4616   if (!row){
4617     start = A->rmap->rstart; end = A->rmap->rend;
4618     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4619   } else {
4620     isrowa = *row;
4621   }
4622   if (!col){
4623     start = A->cmap->rstart;
4624     cmap  = a->garray;
4625     nzA   = a->A->cmap->n;
4626     nzB   = a->B->cmap->n;
4627     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4628     ncols = 0;
4629     for (i=0; i<nzB; i++) {
4630       if (cmap[i] < start) idx[ncols++] = cmap[i];
4631       else break;
4632     }
4633     imark = i;
4634     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4635     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4636     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4637     ierr = PetscFree(idx);CHKERRQ(ierr);
4638   } else {
4639     iscola = *col;
4640   }
4641   if (scall != MAT_INITIAL_MATRIX){
4642     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4643     aloc[0] = *A_loc;
4644   }
4645   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4646   *A_loc = aloc[0];
4647   ierr = PetscFree(aloc);CHKERRQ(ierr);
4648   if (!row){
4649     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4650   }
4651   if (!col){
4652     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4653   }
4654   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4655   PetscFunctionReturn(0);
4656 }
4657 
4658 #undef __FUNCT__
4659 #define __FUNCT__ "MatGetBrowsOfAcols"
4660 /*@C
4661     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4662 
4663     Collective on Mat
4664 
4665    Input Parameters:
4666 +    A,B - the matrices in mpiaij format
4667 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4668 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4669 
4670    Output Parameter:
4671 +    rowb, colb - index sets of rows and columns of B to extract
4672 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4673 -    B_seq - the sequential matrix generated
4674 
4675     Level: developer
4676 
4677 @*/
4678 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4679 {
4680   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4681   PetscErrorCode    ierr;
4682   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4683   IS                isrowb,iscolb;
4684   Mat               *bseq;
4685 
4686   PetscFunctionBegin;
4687   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4688     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4689   }
4690   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4691 
4692   if (scall == MAT_INITIAL_MATRIX){
4693     start = A->cmap->rstart;
4694     cmap  = a->garray;
4695     nzA   = a->A->cmap->n;
4696     nzB   = a->B->cmap->n;
4697     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4698     ncols = 0;
4699     for (i=0; i<nzB; i++) {  /* row < local row index */
4700       if (cmap[i] < start) idx[ncols++] = cmap[i];
4701       else break;
4702     }
4703     imark = i;
4704     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4705     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4706     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4707     ierr = PetscFree(idx);CHKERRQ(ierr);
4708     *brstart = imark;
4709     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4710   } else {
4711     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4712     isrowb = *rowb; iscolb = *colb;
4713     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4714     bseq[0] = *B_seq;
4715   }
4716   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4717   *B_seq = bseq[0];
4718   ierr = PetscFree(bseq);CHKERRQ(ierr);
4719   if (!rowb){
4720     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4721   } else {
4722     *rowb = isrowb;
4723   }
4724   if (!colb){
4725     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4726   } else {
4727     *colb = iscolb;
4728   }
4729   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4730   PetscFunctionReturn(0);
4731 }
4732 
4733 #undef __FUNCT__
4734 #define __FUNCT__ "MatGetBrowsOfAoCols"
4735 /*@C
4736     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4737     of the OFF-DIAGONAL portion of local A
4738 
4739     Collective on Mat
4740 
4741    Input Parameters:
4742 +    A,B - the matrices in mpiaij format
4743 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4744 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4745 .    startsj_r - similar to startsj for receives
4746 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4747 
4748    Output Parameter:
4749 +    B_oth - the sequential matrix generated
4750 
4751     Level: developer
4752 
4753 @*/
4754 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4755 {
4756   VecScatter_MPI_General *gen_to,*gen_from;
4757   PetscErrorCode         ierr;
4758   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4759   Mat_SeqAIJ             *b_oth;
4760   VecScatter             ctx=a->Mvctx;
4761   MPI_Comm               comm=((PetscObject)ctx)->comm;
4762   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4763   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4764   PetscScalar            *rvalues,*svalues;
4765   MatScalar              *b_otha,*bufa,*bufA;
4766   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4767   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4768   MPI_Status             *sstatus,rstatus;
4769   PetscMPIInt            jj;
4770   PetscInt               *cols,sbs,rbs;
4771   PetscScalar            *vals;
4772 
4773   PetscFunctionBegin;
4774   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4775     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4776   }
4777   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4778   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4779 
4780   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4781   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4782   rvalues  = gen_from->values; /* holds the length of receiving row */
4783   svalues  = gen_to->values;   /* holds the length of sending row */
4784   nrecvs   = gen_from->n;
4785   nsends   = gen_to->n;
4786 
4787   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4788   srow     = gen_to->indices;   /* local row index to be sent */
4789   sstarts  = gen_to->starts;
4790   sprocs   = gen_to->procs;
4791   sstatus  = gen_to->sstatus;
4792   sbs      = gen_to->bs;
4793   rstarts  = gen_from->starts;
4794   rprocs   = gen_from->procs;
4795   rbs      = gen_from->bs;
4796 
4797   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4798   if (scall == MAT_INITIAL_MATRIX){
4799     /* i-array */
4800     /*---------*/
4801     /*  post receives */
4802     for (i=0; i<nrecvs; i++){
4803       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4804       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4805       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4806     }
4807 
4808     /* pack the outgoing message */
4809     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
4810     sstartsj[0] = 0;  rstartsj[0] = 0;
4811     len = 0; /* total length of j or a array to be sent */
4812     k = 0;
4813     for (i=0; i<nsends; i++){
4814       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4815       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4816       for (j=0; j<nrows; j++) {
4817         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4818         for (l=0; l<sbs; l++){
4819           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4820           rowlen[j*sbs+l] = ncols;
4821           len += ncols;
4822           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4823         }
4824         k++;
4825       }
4826       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4827       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4828     }
4829     /* recvs and sends of i-array are completed */
4830     i = nrecvs;
4831     while (i--) {
4832       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4833     }
4834     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4835 
4836     /* allocate buffers for sending j and a arrays */
4837     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4838     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4839 
4840     /* create i-array of B_oth */
4841     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4842     b_othi[0] = 0;
4843     len = 0; /* total length of j or a array to be received */
4844     k = 0;
4845     for (i=0; i<nrecvs; i++){
4846       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4847       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4848       for (j=0; j<nrows; j++) {
4849         b_othi[k+1] = b_othi[k] + rowlen[j];
4850         len += rowlen[j]; k++;
4851       }
4852       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4853     }
4854 
4855     /* allocate space for j and a arrrays of B_oth */
4856     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4857     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4858 
4859     /* j-array */
4860     /*---------*/
4861     /*  post receives of j-array */
4862     for (i=0; i<nrecvs; i++){
4863       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4864       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4865     }
4866 
4867     /* pack the outgoing message j-array */
4868     k = 0;
4869     for (i=0; i<nsends; i++){
4870       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4871       bufJ = bufj+sstartsj[i];
4872       for (j=0; j<nrows; j++) {
4873         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4874         for (ll=0; ll<sbs; ll++){
4875           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4876           for (l=0; l<ncols; l++){
4877             *bufJ++ = cols[l];
4878           }
4879           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4880         }
4881       }
4882       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4883     }
4884 
4885     /* recvs and sends of j-array are completed */
4886     i = nrecvs;
4887     while (i--) {
4888       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4889     }
4890     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4891   } else if (scall == MAT_REUSE_MATRIX){
4892     sstartsj = *startsj;
4893     rstartsj = *startsj_r;
4894     bufa     = *bufa_ptr;
4895     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4896     b_otha   = b_oth->a;
4897   } else {
4898     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4899   }
4900 
4901   /* a-array */
4902   /*---------*/
4903   /*  post receives of a-array */
4904   for (i=0; i<nrecvs; i++){
4905     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4906     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4907   }
4908 
4909   /* pack the outgoing message a-array */
4910   k = 0;
4911   for (i=0; i<nsends; i++){
4912     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4913     bufA = bufa+sstartsj[i];
4914     for (j=0; j<nrows; j++) {
4915       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4916       for (ll=0; ll<sbs; ll++){
4917         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4918         for (l=0; l<ncols; l++){
4919           *bufA++ = vals[l];
4920         }
4921         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4922       }
4923     }
4924     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4925   }
4926   /* recvs and sends of a-array are completed */
4927   i = nrecvs;
4928   while (i--) {
4929     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4930   }
4931   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4932   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4933 
4934   if (scall == MAT_INITIAL_MATRIX){
4935     /* put together the new matrix */
4936     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4937 
4938     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4939     /* Since these are PETSc arrays, change flags to free them as necessary. */
4940     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4941     b_oth->free_a  = PETSC_TRUE;
4942     b_oth->free_ij = PETSC_TRUE;
4943     b_oth->nonew   = 0;
4944 
4945     ierr = PetscFree(bufj);CHKERRQ(ierr);
4946     if (!startsj || !bufa_ptr){
4947       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4948       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4949     } else {
4950       *startsj   = sstartsj;
4951       *startsj_r = rstartsj;
4952       *bufa_ptr  = bufa;
4953     }
4954   }
4955   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4956   PetscFunctionReturn(0);
4957 }
4958 
4959 #undef __FUNCT__
4960 #define __FUNCT__ "MatGetCommunicationStructs"
4961 /*@C
4962   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4963 
4964   Not Collective
4965 
4966   Input Parameters:
4967 . A - The matrix in mpiaij format
4968 
4969   Output Parameter:
4970 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4971 . colmap - A map from global column index to local index into lvec
4972 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4973 
4974   Level: developer
4975 
4976 @*/
4977 #if defined (PETSC_USE_CTABLE)
4978 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4979 #else
4980 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4981 #endif
4982 {
4983   Mat_MPIAIJ *a;
4984 
4985   PetscFunctionBegin;
4986   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4987   PetscValidPointer(lvec, 2);
4988   PetscValidPointer(colmap, 3);
4989   PetscValidPointer(multScatter, 4);
4990   a = (Mat_MPIAIJ *) A->data;
4991   if (lvec) *lvec = a->lvec;
4992   if (colmap) *colmap = a->colmap;
4993   if (multScatter) *multScatter = a->Mvctx;
4994   PetscFunctionReturn(0);
4995 }
4996 
4997 EXTERN_C_BEGIN
4998 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
4999 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5000 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5001 EXTERN_C_END
5002 
5003 #undef __FUNCT__
5004 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5005 /*
5006     Computes (B'*A')' since computing B*A directly is untenable
5007 
5008                n                       p                          p
5009         (              )       (              )         (                  )
5010       m (      A       )  *  n (       B      )   =   m (         C        )
5011         (              )       (              )         (                  )
5012 
5013 */
5014 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5015 {
5016   PetscErrorCode     ierr;
5017   Mat                At,Bt,Ct;
5018 
5019   PetscFunctionBegin;
5020   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5021   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5022   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5023   ierr = MatDestroy(At);CHKERRQ(ierr);
5024   ierr = MatDestroy(Bt);CHKERRQ(ierr);
5025   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5026   ierr = MatDestroy(Ct);CHKERRQ(ierr);
5027   PetscFunctionReturn(0);
5028 }
5029 
5030 #undef __FUNCT__
5031 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5032 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5033 {
5034   PetscErrorCode ierr;
5035   PetscInt       m=A->rmap->n,n=B->cmap->n;
5036   Mat            Cmat;
5037 
5038   PetscFunctionBegin;
5039   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5040   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5041   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5042   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5043   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5044   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5045   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5046   *C   = Cmat;
5047   PetscFunctionReturn(0);
5048 }
5049 
5050 /* ----------------------------------------------------------------*/
5051 #undef __FUNCT__
5052 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5053 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5054 {
5055   PetscErrorCode ierr;
5056 
5057   PetscFunctionBegin;
5058   if (scall == MAT_INITIAL_MATRIX){
5059     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5060   }
5061   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5062   PetscFunctionReturn(0);
5063 }
5064 
5065 EXTERN_C_BEGIN
5066 #if defined(PETSC_HAVE_MUMPS)
5067 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5068 #endif
5069 #if defined(PETSC_HAVE_PASTIX)
5070 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5071 #endif
5072 #if defined(PETSC_HAVE_SUPERLU_DIST)
5073 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5074 #endif
5075 #if defined(PETSC_HAVE_SPOOLES)
5076 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5077 #endif
5078 EXTERN_C_END
5079 
5080 /*MC
5081    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5082 
5083    Options Database Keys:
5084 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5085 
5086   Level: beginner
5087 
5088 .seealso: MatCreateMPIAIJ()
5089 M*/
5090 
5091 EXTERN_C_BEGIN
5092 #undef __FUNCT__
5093 #define __FUNCT__ "MatCreate_MPIAIJ"
5094 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
5095 {
5096   Mat_MPIAIJ     *b;
5097   PetscErrorCode ierr;
5098   PetscMPIInt    size;
5099 
5100   PetscFunctionBegin;
5101   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5102 
5103   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5104   B->data         = (void*)b;
5105   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5106   B->rmap->bs     = 1;
5107   B->assembled    = PETSC_FALSE;
5108   B->mapping      = 0;
5109 
5110   B->insertmode   = NOT_SET_VALUES;
5111   b->size         = size;
5112   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5113 
5114   /* build cache for off array entries formed */
5115   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5116   b->donotstash  = PETSC_FALSE;
5117   b->colmap      = 0;
5118   b->garray      = 0;
5119   b->roworiented = PETSC_TRUE;
5120 
5121   /* stuff used for matrix vector multiply */
5122   b->lvec      = PETSC_NULL;
5123   b->Mvctx     = PETSC_NULL;
5124 
5125   /* stuff for MatGetRow() */
5126   b->rowindices   = 0;
5127   b->rowvalues    = 0;
5128   b->getrowactive = PETSC_FALSE;
5129 
5130 #if defined(PETSC_HAVE_SPOOLES)
5131   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5132                                      "MatGetFactor_mpiaij_spooles",
5133                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5134 #endif
5135 #if defined(PETSC_HAVE_MUMPS)
5136   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5137                                      "MatGetFactor_aij_mumps",
5138                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5139 #endif
5140 #if defined(PETSC_HAVE_PASTIX)
5141   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5142 					   "MatGetFactor_mpiaij_pastix",
5143 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5144 #endif
5145 #if defined(PETSC_HAVE_SUPERLU_DIST)
5146   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5147                                      "MatGetFactor_mpiaij_superlu_dist",
5148                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5149 #endif
5150   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5151                                      "MatStoreValues_MPIAIJ",
5152                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5153   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5154                                      "MatRetrieveValues_MPIAIJ",
5155                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5156   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5157 				     "MatGetDiagonalBlock_MPIAIJ",
5158                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5159   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5160 				     "MatIsTranspose_MPIAIJ",
5161 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5162   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5163 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5164 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5165   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5166 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5167 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5168   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5169 				     "MatDiagonalScaleLocal_MPIAIJ",
5170 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5171   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5172                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5173                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5174   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5175                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5176                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5177   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5178                                      "MatConvert_MPIAIJ_MPISBAIJ",
5179                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5180   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5181                                      "MatMatMult_MPIDense_MPIAIJ",
5182                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5183   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5184                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5185                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5186   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5187                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5188                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5189   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5190   PetscFunctionReturn(0);
5191 }
5192 EXTERN_C_END
5193 
5194 #undef __FUNCT__
5195 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5196 /*@
5197      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5198          and "off-diagonal" part of the matrix in CSR format.
5199 
5200    Collective on MPI_Comm
5201 
5202    Input Parameters:
5203 +  comm - MPI communicator
5204 .  m - number of local rows (Cannot be PETSC_DECIDE)
5205 .  n - This value should be the same as the local size used in creating the
5206        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5207        calculated if N is given) For square matrices n is almost always m.
5208 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5209 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5210 .   i - row indices for "diagonal" portion of matrix
5211 .   j - column indices
5212 .   a - matrix values
5213 .   oi - row indices for "off-diagonal" portion of matrix
5214 .   oj - column indices
5215 -   oa - matrix values
5216 
5217    Output Parameter:
5218 .   mat - the matrix
5219 
5220    Level: advanced
5221 
5222    Notes:
5223        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5224 
5225        The i and j indices are 0 based
5226 
5227        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5228 
5229        This sets local rows and cannot be used to set off-processor values.
5230 
5231        You cannot later use MatSetValues() to change values in this matrix.
5232 
5233 .keywords: matrix, aij, compressed row, sparse, parallel
5234 
5235 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5236           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5237 @*/
5238 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5239 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5240 {
5241   PetscErrorCode ierr;
5242   Mat_MPIAIJ     *maij;
5243 
5244  PetscFunctionBegin;
5245   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5246   if (i[0]) {
5247     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5248   }
5249   if (oi[0]) {
5250     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5251   }
5252   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5253   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5254   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5255   maij = (Mat_MPIAIJ*) (*mat)->data;
5256   maij->donotstash     = PETSC_TRUE;
5257   (*mat)->preallocated = PETSC_TRUE;
5258 
5259   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5260   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5261   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5262   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5263 
5264   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5265   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5266 
5267   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5268   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5269   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5270   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5271 
5272   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5273   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5274   PetscFunctionReturn(0);
5275 }
5276 
5277 /*
5278     Special version for direct calls from Fortran
5279 */
5280 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5281 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5282 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5283 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5284 #endif
5285 
5286 /* Change these macros so can be used in void function */
5287 #undef CHKERRQ
5288 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5289 #undef SETERRQ2
5290 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5291 #undef SETERRQ
5292 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5293 
5294 EXTERN_C_BEGIN
5295 #undef __FUNCT__
5296 #define __FUNCT__ "matsetvaluesmpiaij_"
5297 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5298 {
5299   Mat             mat = *mmat;
5300   PetscInt        m = *mm, n = *mn;
5301   InsertMode      addv = *maddv;
5302   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5303   PetscScalar     value;
5304   PetscErrorCode  ierr;
5305 
5306   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5307   if (mat->insertmode == NOT_SET_VALUES) {
5308     mat->insertmode = addv;
5309   }
5310 #if defined(PETSC_USE_DEBUG)
5311   else if (mat->insertmode != addv) {
5312     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5313   }
5314 #endif
5315   {
5316   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5317   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5318   PetscBool       roworiented = aij->roworiented;
5319 
5320   /* Some Variables required in the macro */
5321   Mat             A = aij->A;
5322   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5323   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5324   MatScalar       *aa = a->a;
5325   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5326   Mat             B = aij->B;
5327   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5328   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5329   MatScalar       *ba = b->a;
5330 
5331   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5332   PetscInt        nonew = a->nonew;
5333   MatScalar       *ap1,*ap2;
5334 
5335   PetscFunctionBegin;
5336   for (i=0; i<m; i++) {
5337     if (im[i] < 0) continue;
5338 #if defined(PETSC_USE_DEBUG)
5339     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5340 #endif
5341     if (im[i] >= rstart && im[i] < rend) {
5342       row      = im[i] - rstart;
5343       lastcol1 = -1;
5344       rp1      = aj + ai[row];
5345       ap1      = aa + ai[row];
5346       rmax1    = aimax[row];
5347       nrow1    = ailen[row];
5348       low1     = 0;
5349       high1    = nrow1;
5350       lastcol2 = -1;
5351       rp2      = bj + bi[row];
5352       ap2      = ba + bi[row];
5353       rmax2    = bimax[row];
5354       nrow2    = bilen[row];
5355       low2     = 0;
5356       high2    = nrow2;
5357 
5358       for (j=0; j<n; j++) {
5359         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5360         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5361         if (in[j] >= cstart && in[j] < cend){
5362           col = in[j] - cstart;
5363           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5364         } else if (in[j] < 0) continue;
5365 #if defined(PETSC_USE_DEBUG)
5366         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5367 #endif
5368         else {
5369           if (mat->was_assembled) {
5370             if (!aij->colmap) {
5371               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5372             }
5373 #if defined (PETSC_USE_CTABLE)
5374             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5375 	    col--;
5376 #else
5377             col = aij->colmap[in[j]] - 1;
5378 #endif
5379             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5380               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5381               col =  in[j];
5382               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5383               B = aij->B;
5384               b = (Mat_SeqAIJ*)B->data;
5385               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5386               rp2      = bj + bi[row];
5387               ap2      = ba + bi[row];
5388               rmax2    = bimax[row];
5389               nrow2    = bilen[row];
5390               low2     = 0;
5391               high2    = nrow2;
5392               bm       = aij->B->rmap->n;
5393               ba = b->a;
5394             }
5395           } else col = in[j];
5396           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5397         }
5398       }
5399     } else {
5400       if (!aij->donotstash) {
5401         if (roworiented) {
5402           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool )(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5403         } else {
5404           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool )(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5405         }
5406       }
5407     }
5408   }}
5409   PetscFunctionReturnVoid();
5410 }
5411 EXTERN_C_END
5412 
5413