xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 4cb17eb5d24fda53f1a75322daaa24f590374b40)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "petscblaslapack.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscTruth     aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap->n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap->n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap->n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap->n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
207 { \
208     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
209     lastcol1 = col;\
210     while (high1-low1 > 5) { \
211       t = (low1+high1)/2; \
212       if (rp1[t] > col) high1 = t; \
213       else             low1  = t; \
214     } \
215       for (_i=low1; _i<high1; _i++) { \
216         if (rp1[_i] > col) break; \
217         if (rp1[_i] == col) { \
218           if (addv == ADD_VALUES) ap1[_i] += value;   \
219           else                    ap1[_i] = value; \
220           goto a_noinsert; \
221         } \
222       }  \
223       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
224       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
225       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
226       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
227       N = nrow1++ - 1; a->nz++; high1++; \
228       /* shift up all the later entries in this row */ \
229       for (ii=N; ii>=_i; ii--) { \
230         rp1[ii+1] = rp1[ii]; \
231         ap1[ii+1] = ap1[ii]; \
232       } \
233       rp1[_i] = col;  \
234       ap1[_i] = value;  \
235       a_noinsert: ; \
236       ailen[row] = nrow1; \
237 }
238 
239 
240 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
241 { \
242     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
243     lastcol2 = col;\
244     while (high2-low2 > 5) { \
245       t = (low2+high2)/2; \
246       if (rp2[t] > col) high2 = t; \
247       else             low2  = t; \
248     } \
249     for (_i=low2; _i<high2; _i++) {		\
250       if (rp2[_i] > col) break;			\
251       if (rp2[_i] == col) {			      \
252 	if (addv == ADD_VALUES) ap2[_i] += value;     \
253 	else                    ap2[_i] = value;      \
254 	goto b_noinsert;			      \
255       }						      \
256     }							      \
257     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
258     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
259     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
260     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
261     N = nrow2++ - 1; b->nz++; high2++;					\
262     /* shift up all the later entries in this row */			\
263     for (ii=N; ii>=_i; ii--) {						\
264       rp2[ii+1] = rp2[ii];						\
265       ap2[ii+1] = ap2[ii];						\
266     }									\
267     rp2[_i] = col;							\
268     ap2[_i] = value;							\
269     b_noinsert: ;								\
270     bilen[row] = nrow2;							\
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
275 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
276 {
277   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
278   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
279   PetscErrorCode ierr;
280   PetscInt       l,*garray = mat->garray,diag;
281 
282   PetscFunctionBegin;
283   /* code only works for square matrices A */
284 
285   /* find size of row to the left of the diagonal part */
286   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
287   row  = row - diag;
288   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
289     if (garray[b->j[b->i[row]+l]] > diag) break;
290   }
291   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
292 
293   /* diagonal part */
294   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
295 
296   /* right of diagonal part */
297   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
298   PetscFunctionReturn(0);
299 }
300 
301 #undef __FUNCT__
302 #define __FUNCT__ "MatSetValues_MPIAIJ"
303 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
304 {
305   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
306   PetscScalar    value;
307   PetscErrorCode ierr;
308   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
309   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
310   PetscTruth     roworiented = aij->roworiented;
311 
312   /* Some Variables required in the macro */
313   Mat            A = aij->A;
314   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
315   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
316   MatScalar      *aa = a->a;
317   PetscTruth     ignorezeroentries = a->ignorezeroentries;
318   Mat            B = aij->B;
319   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
320   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
321   MatScalar      *ba = b->a;
322 
323   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
324   PetscInt       nonew = a->nonew;
325   MatScalar      *ap1,*ap2;
326 
327   PetscFunctionBegin;
328   if (v) PetscValidScalarPointer(v,6);
329   for (i=0; i<m; i++) {
330     if (im[i] < 0) continue;
331 #if defined(PETSC_USE_DEBUG)
332     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
333 #endif
334     if (im[i] >= rstart && im[i] < rend) {
335       row      = im[i] - rstart;
336       lastcol1 = -1;
337       rp1      = aj + ai[row];
338       ap1      = aa + ai[row];
339       rmax1    = aimax[row];
340       nrow1    = ailen[row];
341       low1     = 0;
342       high1    = nrow1;
343       lastcol2 = -1;
344       rp2      = bj + bi[row];
345       ap2      = ba + bi[row];
346       rmax2    = bimax[row];
347       nrow2    = bilen[row];
348       low2     = 0;
349       high2    = nrow2;
350 
351       for (j=0; j<n; j++) {
352         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
353         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
354         if (in[j] >= cstart && in[j] < cend){
355           col = in[j] - cstart;
356           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
357         } else if (in[j] < 0) continue;
358 #if defined(PETSC_USE_DEBUG)
359         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
360 #endif
361         else {
362           if (mat->was_assembled) {
363             if (!aij->colmap) {
364               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
365             }
366 #if defined (PETSC_USE_CTABLE)
367             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
368 	    col--;
369 #else
370             col = aij->colmap[in[j]] - 1;
371 #endif
372             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
373               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
374               col =  in[j];
375               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
376               B = aij->B;
377               b = (Mat_SeqAIJ*)B->data;
378               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
379               rp2      = bj + bi[row];
380               ap2      = ba + bi[row];
381               rmax2    = bimax[row];
382               nrow2    = bilen[row];
383               low2     = 0;
384               high2    = nrow2;
385               bm       = aij->B->rmap->n;
386               ba = b->a;
387             }
388           } else col = in[j];
389           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
390         }
391       }
392     } else {
393       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
394       if (!aij->donotstash) {
395         if (roworiented) {
396           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
397         } else {
398           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
399         }
400       }
401     }
402   }
403   PetscFunctionReturn(0);
404 }
405 
406 #undef __FUNCT__
407 #define __FUNCT__ "MatGetValues_MPIAIJ"
408 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
409 {
410   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
413   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
414 
415   PetscFunctionBegin;
416   for (i=0; i<m; i++) {
417     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
418     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
419     if (idxm[i] >= rstart && idxm[i] < rend) {
420       row = idxm[i] - rstart;
421       for (j=0; j<n; j++) {
422         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
423         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
424         if (idxn[j] >= cstart && idxn[j] < cend){
425           col = idxn[j] - cstart;
426           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
427         } else {
428           if (!aij->colmap) {
429             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
430           }
431 #if defined (PETSC_USE_CTABLE)
432           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
433           col --;
434 #else
435           col = aij->colmap[idxn[j]] - 1;
436 #endif
437           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
438           else {
439             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
440           }
441         }
442       }
443     } else {
444       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
445     }
446   }
447   PetscFunctionReturn(0);
448 }
449 
450 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscErrorCode ierr;
458   PetscInt       nstash,reallocs;
459   InsertMode     addv;
460 
461   PetscFunctionBegin;
462   if (aij->donotstash || mat->nooffprocentries) {
463     PetscFunctionReturn(0);
464   }
465 
466   /* make sure all processors are either in INSERTMODE or ADDMODE */
467   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
468   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
469   mat->insertmode = addv; /* in case this processor had no cache */
470 
471   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
472   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
473   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
474   PetscFunctionReturn(0);
475 }
476 
477 #undef __FUNCT__
478 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
479 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
480 {
481   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
482   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
483   PetscErrorCode ierr;
484   PetscMPIInt    n;
485   PetscInt       i,j,rstart,ncols,flg;
486   PetscInt       *row,*col;
487   PetscTruth     other_disassembled;
488   PetscScalar    *val;
489   InsertMode     addv = mat->insertmode;
490 
491   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
492   PetscFunctionBegin;
493   if (!aij->donotstash && !mat->nooffprocentries) {
494     while (1) {
495       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
496       if (!flg) break;
497 
498       for (i=0; i<n;) {
499         /* Now identify the consecutive vals belonging to the same row */
500         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
501         if (j < n) ncols = j-i;
502         else       ncols = n-i;
503         /* Now assemble all these values with a single function call */
504         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
505         i = j;
506       }
507     }
508     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
509   }
510   a->compressedrow.use     = PETSC_FALSE;
511   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
512   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
513 
514   /* determine if any processor has disassembled, if so we must
515      also disassemble ourselfs, in order that we may reassemble. */
516   /*
517      if nonzero structure of submatrix B cannot change then we know that
518      no processor disassembled thus we can skip this stuff
519   */
520   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
521     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
522     if (mat->was_assembled && !other_disassembled) {
523       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
524     }
525   }
526   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
527     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
528   }
529   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
530   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
531   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
532   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
533 
534   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
535   aij->rowvalues = 0;
536 
537   /* used by MatAXPY() */
538   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
539   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
540 
541   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
542   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
549 {
550   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
551   PetscErrorCode ierr;
552 
553   PetscFunctionBegin;
554   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
555   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNCT__
560 #define __FUNCT__ "MatZeroRows_MPIAIJ"
561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
562 {
563   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
564   PetscErrorCode ierr;
565   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
566   PetscInt       i,*owners = A->rmap->range;
567   PetscInt       *nprocs,j,idx,nsends,row;
568   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
569   PetscInt       *rvalues,count,base,slen,*source;
570   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
571   MPI_Comm       comm = ((PetscObject)A)->comm;
572   MPI_Request    *send_waits,*recv_waits;
573   MPI_Status     recv_status,*send_status;
574 #if defined(PETSC_DEBUG)
575   PetscTruth     found = PETSC_FALSE;
576 #endif
577 
578   PetscFunctionBegin;
579   /*  first count number of contributors to each processor */
580   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
581   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
582   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
583   j = 0;
584   for (i=0; i<N; i++) {
585     if (lastidx > (idx = rows[i])) j = 0;
586     lastidx = idx;
587     for (; j<size; j++) {
588       if (idx >= owners[j] && idx < owners[j+1]) {
589         nprocs[2*j]++;
590         nprocs[2*j+1] = 1;
591         owner[i] = j;
592 #if defined(PETSC_DEBUG)
593         found = PETSC_TRUE;
594 #endif
595         break;
596       }
597     }
598 #if defined(PETSC_DEBUG)
599     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
600     found = PETSC_FALSE;
601 #endif
602   }
603   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
604 
605   /* inform other processors of number of messages and max length*/
606   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
607 
608   /* post receives:   */
609   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
610   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
611   for (i=0; i<nrecvs; i++) {
612     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
613   }
614 
615   /* do sends:
616       1) starts[i] gives the starting index in svalues for stuff going to
617          the ith processor
618   */
619   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
620   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
621   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
622   starts[0] = 0;
623   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
624   for (i=0; i<N; i++) {
625     svalues[starts[owner[i]]++] = rows[i];
626   }
627 
628   starts[0] = 0;
629   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
630   count = 0;
631   for (i=0; i<size; i++) {
632     if (nprocs[2*i+1]) {
633       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
634     }
635   }
636   ierr = PetscFree(starts);CHKERRQ(ierr);
637 
638   base = owners[rank];
639 
640   /*  wait on receives */
641   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
642   count  = nrecvs; slen = 0;
643   while (count) {
644     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
645     /* unpack receives into our local space */
646     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
647     source[imdex]  = recv_status.MPI_SOURCE;
648     lens[imdex]    = n;
649     slen          += n;
650     count--;
651   }
652   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
653 
654   /* move the data into the send scatter */
655   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
656   count = 0;
657   for (i=0; i<nrecvs; i++) {
658     values = rvalues + i*nmax;
659     for (j=0; j<lens[i]; j++) {
660       lrows[count++] = values[j] - base;
661     }
662   }
663   ierr = PetscFree(rvalues);CHKERRQ(ierr);
664   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
665   ierr = PetscFree(owner);CHKERRQ(ierr);
666   ierr = PetscFree(nprocs);CHKERRQ(ierr);
667 
668   /* actually zap the local rows */
669   /*
670         Zero the required rows. If the "diagonal block" of the matrix
671      is square and the user wishes to set the diagonal we use separate
672      code so that MatSetValues() is not called for each diagonal allocating
673      new memory, thus calling lots of mallocs and slowing things down.
674 
675   */
676   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
677   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
678   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
679     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
680   } else if (diag != 0.0) {
681     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
682     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
683       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
684 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
685     }
686     for (i = 0; i < slen; i++) {
687       row  = lrows[i] + rstart;
688       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
689     }
690     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
691     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
692   } else {
693     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
694   }
695   ierr = PetscFree(lrows);CHKERRQ(ierr);
696 
697   /* wait on sends */
698   if (nsends) {
699     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
700     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
701     ierr = PetscFree(send_status);CHKERRQ(ierr);
702   }
703   ierr = PetscFree(send_waits);CHKERRQ(ierr);
704   ierr = PetscFree(svalues);CHKERRQ(ierr);
705 
706   PetscFunctionReturn(0);
707 }
708 
709 #undef __FUNCT__
710 #define __FUNCT__ "MatMult_MPIAIJ"
711 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
712 {
713   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
714   PetscErrorCode ierr;
715   PetscInt       nt;
716 
717   PetscFunctionBegin;
718   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
719   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
720   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
721   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
722   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
723   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
724   PetscFunctionReturn(0);
725 }
726 
727 #undef __FUNCT__
728 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
729 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
730 {
731   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
732   PetscErrorCode ierr;
733 
734   PetscFunctionBegin;
735   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
736   PetscFunctionReturn(0);
737 }
738 
739 #undef __FUNCT__
740 #define __FUNCT__ "MatMultAdd_MPIAIJ"
741 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
742 {
743   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
744   PetscErrorCode ierr;
745 
746   PetscFunctionBegin;
747   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
748   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
749   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
750   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
751   PetscFunctionReturn(0);
752 }
753 
754 #undef __FUNCT__
755 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
756 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
757 {
758   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
759   PetscErrorCode ierr;
760   PetscTruth     merged;
761 
762   PetscFunctionBegin;
763   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
764   /* do nondiagonal part */
765   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
766   if (!merged) {
767     /* send it on its way */
768     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
769     /* do local part */
770     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
771     /* receive remote parts: note this assumes the values are not actually */
772     /* added in yy until the next line, */
773     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
774   } else {
775     /* do local part */
776     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
777     /* send it on its way */
778     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
779     /* values actually were received in the Begin() but we need to call this nop */
780     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
781   }
782   PetscFunctionReturn(0);
783 }
784 
785 EXTERN_C_BEGIN
786 #undef __FUNCT__
787 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
788 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
789 {
790   MPI_Comm       comm;
791   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
792   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
793   IS             Me,Notme;
794   PetscErrorCode ierr;
795   PetscInt       M,N,first,last,*notme,i;
796   PetscMPIInt    size;
797 
798   PetscFunctionBegin;
799 
800   /* Easy test: symmetric diagonal block */
801   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
802   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
803   if (!*f) PetscFunctionReturn(0);
804   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
805   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
806   if (size == 1) PetscFunctionReturn(0);
807 
808   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
809   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
810   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
811   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
812   for (i=0; i<first; i++) notme[i] = i;
813   for (i=last; i<M; i++) notme[i-last+first] = i;
814   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
815   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
816   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
817   Aoff = Aoffs[0];
818   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
819   Boff = Boffs[0];
820   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
821   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
822   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
823   ierr = ISDestroy(Me);CHKERRQ(ierr);
824   ierr = ISDestroy(Notme);CHKERRQ(ierr);
825 
826   PetscFunctionReturn(0);
827 }
828 EXTERN_C_END
829 
830 #undef __FUNCT__
831 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
832 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
833 {
834   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
835   PetscErrorCode ierr;
836 
837   PetscFunctionBegin;
838   /* do nondiagonal part */
839   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
840   /* send it on its way */
841   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
842   /* do local part */
843   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
844   /* receive remote parts */
845   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
846   PetscFunctionReturn(0);
847 }
848 
849 /*
850   This only works correctly for square matrices where the subblock A->A is the
851    diagonal block
852 */
853 #undef __FUNCT__
854 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
855 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
856 {
857   PetscErrorCode ierr;
858   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
859 
860   PetscFunctionBegin;
861   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
862   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
863   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
864   PetscFunctionReturn(0);
865 }
866 
867 #undef __FUNCT__
868 #define __FUNCT__ "MatScale_MPIAIJ"
869 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
870 {
871   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
872   PetscErrorCode ierr;
873 
874   PetscFunctionBegin;
875   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
876   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
877   PetscFunctionReturn(0);
878 }
879 
880 #undef __FUNCT__
881 #define __FUNCT__ "MatDestroy_MPIAIJ"
882 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
883 {
884   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
885   PetscErrorCode ierr;
886 
887   PetscFunctionBegin;
888 #if defined(PETSC_USE_LOG)
889   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
890 #endif
891   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
892   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
893   if (aij->A){ierr = MatDestroy(aij->A);CHKERRQ(ierr);}
894   if (aij->B){ierr = MatDestroy(aij->B);CHKERRQ(ierr);}
895 #if defined (PETSC_USE_CTABLE)
896   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
897 #else
898   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
899 #endif
900   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
901   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
902   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
903   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
904   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
905   ierr = PetscFree(aij);CHKERRQ(ierr);
906 
907   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
908   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
909   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
910   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
911   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
912   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
913   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
914   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
915   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
916   PetscFunctionReturn(0);
917 }
918 
919 #undef __FUNCT__
920 #define __FUNCT__ "MatView_MPIAIJ_Binary"
921 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
922 {
923   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
924   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
925   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
926   PetscErrorCode    ierr;
927   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
928   int               fd;
929   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
930   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
931   PetscScalar       *column_values;
932 
933   PetscFunctionBegin;
934   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
935   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
936   nz   = A->nz + B->nz;
937   if (!rank) {
938     header[0] = MAT_FILE_CLASSID;
939     header[1] = mat->rmap->N;
940     header[2] = mat->cmap->N;
941     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
942     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
943     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
944     /* get largest number of rows any processor has */
945     rlen = mat->rmap->n;
946     range = mat->rmap->range;
947     for (i=1; i<size; i++) {
948       rlen = PetscMax(rlen,range[i+1] - range[i]);
949     }
950   } else {
951     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
952     rlen = mat->rmap->n;
953   }
954 
955   /* load up the local row counts */
956   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
957   for (i=0; i<mat->rmap->n; i++) {
958     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
959   }
960 
961   /* store the row lengths to the file */
962   if (!rank) {
963     MPI_Status status;
964     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
965     for (i=1; i<size; i++) {
966       rlen = range[i+1] - range[i];
967       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
968       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
969     }
970   } else {
971     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
972   }
973   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
974 
975   /* load up the local column indices */
976   nzmax = nz; /* )th processor needs space a largest processor needs */
977   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
978   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
979   cnt  = 0;
980   for (i=0; i<mat->rmap->n; i++) {
981     for (j=B->i[i]; j<B->i[i+1]; j++) {
982       if ( (col = garray[B->j[j]]) > cstart) break;
983       column_indices[cnt++] = col;
984     }
985     for (k=A->i[i]; k<A->i[i+1]; k++) {
986       column_indices[cnt++] = A->j[k] + cstart;
987     }
988     for (; j<B->i[i+1]; j++) {
989       column_indices[cnt++] = garray[B->j[j]];
990     }
991   }
992   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
993 
994   /* store the column indices to the file */
995   if (!rank) {
996     MPI_Status status;
997     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
998     for (i=1; i<size; i++) {
999       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1000       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1001       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1002       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1003     }
1004   } else {
1005     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1006     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1007   }
1008   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1009 
1010   /* load up the local column values */
1011   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1012   cnt  = 0;
1013   for (i=0; i<mat->rmap->n; i++) {
1014     for (j=B->i[i]; j<B->i[i+1]; j++) {
1015       if ( garray[B->j[j]] > cstart) break;
1016       column_values[cnt++] = B->a[j];
1017     }
1018     for (k=A->i[i]; k<A->i[i+1]; k++) {
1019       column_values[cnt++] = A->a[k];
1020     }
1021     for (; j<B->i[i+1]; j++) {
1022       column_values[cnt++] = B->a[j];
1023     }
1024   }
1025   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1026 
1027   /* store the column values to the file */
1028   if (!rank) {
1029     MPI_Status status;
1030     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1031     for (i=1; i<size; i++) {
1032       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1033       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1034       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1035       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1036     }
1037   } else {
1038     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1039     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1040   }
1041   ierr = PetscFree(column_values);CHKERRQ(ierr);
1042   PetscFunctionReturn(0);
1043 }
1044 
1045 #undef __FUNCT__
1046 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1047 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1048 {
1049   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1050   PetscErrorCode    ierr;
1051   PetscMPIInt       rank = aij->rank,size = aij->size;
1052   PetscTruth        isdraw,iascii,isbinary;
1053   PetscViewer       sviewer;
1054   PetscViewerFormat format;
1055 
1056   PetscFunctionBegin;
1057   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1058   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1059   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1060   if (iascii) {
1061     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1062     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1063       MatInfo    info;
1064       PetscTruth inodes;
1065 
1066       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1067       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1068       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1069       if (!inodes) {
1070         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1071 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1072       } else {
1073         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1074 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1075       }
1076       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1077       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1078       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1079       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1080       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1081       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1082       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1083       PetscFunctionReturn(0);
1084     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1085       PetscInt   inodecount,inodelimit,*inodes;
1086       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1087       if (inodes) {
1088         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1089       } else {
1090         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1091       }
1092       PetscFunctionReturn(0);
1093     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1094       PetscFunctionReturn(0);
1095     }
1096   } else if (isbinary) {
1097     if (size == 1) {
1098       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1099       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1100     } else {
1101       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1102     }
1103     PetscFunctionReturn(0);
1104   } else if (isdraw) {
1105     PetscDraw  draw;
1106     PetscTruth isnull;
1107     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1108     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1109   }
1110 
1111   if (size == 1) {
1112     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1113     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1114   } else {
1115     /* assemble the entire matrix onto first processor. */
1116     Mat         A;
1117     Mat_SeqAIJ  *Aloc;
1118     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1119     MatScalar   *a;
1120 
1121     if (mat->rmap->N > 1024) {
1122       PetscTruth flg = PETSC_FALSE;
1123 
1124       ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1125       if (!flg) {
1126         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1127       }
1128     }
1129 
1130     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1131     if (!rank) {
1132       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1133     } else {
1134       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1135     }
1136     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1137     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1138     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1139     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1140 
1141     /* copy over the A part */
1142     Aloc = (Mat_SeqAIJ*)aij->A->data;
1143     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1144     row = mat->rmap->rstart;
1145     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1146     for (i=0; i<m; i++) {
1147       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1148       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1149     }
1150     aj = Aloc->j;
1151     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1152 
1153     /* copy over the B part */
1154     Aloc = (Mat_SeqAIJ*)aij->B->data;
1155     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1156     row  = mat->rmap->rstart;
1157     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1158     ct   = cols;
1159     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1160     for (i=0; i<m; i++) {
1161       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1162       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1163     }
1164     ierr = PetscFree(ct);CHKERRQ(ierr);
1165     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1166     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1167     /*
1168        Everyone has to call to draw the matrix since the graphics waits are
1169        synchronized across all processors that share the PetscDraw object
1170     */
1171     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1172     if (!rank) {
1173       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1174       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1175     }
1176     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1177     ierr = MatDestroy(A);CHKERRQ(ierr);
1178   }
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 #undef __FUNCT__
1183 #define __FUNCT__ "MatView_MPIAIJ"
1184 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1185 {
1186   PetscErrorCode ierr;
1187   PetscTruth     iascii,isdraw,issocket,isbinary;
1188 
1189   PetscFunctionBegin;
1190   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1191   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1192   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1193   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1194   if (iascii || isdraw || isbinary || issocket) {
1195     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1196   } else {
1197     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1198   }
1199   PetscFunctionReturn(0);
1200 }
1201 
1202 #undef __FUNCT__
1203 #define __FUNCT__ "MatSOR_MPIAIJ"
1204 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1205 {
1206   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1207   PetscErrorCode ierr;
1208   Vec            bb1 = 0;
1209   PetscTruth     hasop;
1210 
1211   PetscFunctionBegin;
1212   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1213     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1214   }
1215 
1216   if (flag == SOR_APPLY_UPPER) {
1217     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1218     PetscFunctionReturn(0);
1219   }
1220 
1221   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1222     if (flag & SOR_ZERO_INITIAL_GUESS) {
1223       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1224       its--;
1225     }
1226 
1227     while (its--) {
1228       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1229       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1230 
1231       /* update rhs: bb1 = bb - B*x */
1232       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1233       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1234 
1235       /* local sweep */
1236       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1237     }
1238   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1239     if (flag & SOR_ZERO_INITIAL_GUESS) {
1240       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1241       its--;
1242     }
1243     while (its--) {
1244       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1245       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1246 
1247       /* update rhs: bb1 = bb - B*x */
1248       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1249       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1250 
1251       /* local sweep */
1252       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1253     }
1254   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1255     if (flag & SOR_ZERO_INITIAL_GUESS) {
1256       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1257       its--;
1258     }
1259     while (its--) {
1260       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1261       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1262 
1263       /* update rhs: bb1 = bb - B*x */
1264       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1265       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1266 
1267       /* local sweep */
1268       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1269     }
1270   }  else if (flag & SOR_EISENSTAT) {
1271     Vec         xx1;
1272 
1273     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1274     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1275 
1276     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1277     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1278     if (!mat->diag) {
1279       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1280       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1281     }
1282     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1283     if (hasop) {
1284       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1285     } else {
1286       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1287     }
1288     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1289 
1290     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1291 
1292     /* local sweep */
1293     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1294     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1295     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1296   } else {
1297     SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1298   }
1299 
1300   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 #undef __FUNCT__
1305 #define __FUNCT__ "MatPermute_MPIAIJ"
1306 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1307 {
1308   MPI_Comm       comm,pcomm;
1309   PetscInt       first,local_size,nrows;
1310   const PetscInt *rows;
1311   PetscMPIInt    size;
1312   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1313   PetscErrorCode ierr;
1314 
1315   PetscFunctionBegin;
1316   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1317   /* make a collective version of 'rowp' */
1318   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1319   if (pcomm==comm) {
1320     crowp = rowp;
1321   } else {
1322     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1323     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1324     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1325     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1326   }
1327   /* collect the global row permutation and invert it */
1328   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1329   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1330   if (pcomm!=comm) {
1331     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1332   }
1333   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1334   /* get the local target indices */
1335   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1336   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1337   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1338   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
1339   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1340   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1341   /* the column permutation is so much easier;
1342      make a local version of 'colp' and invert it */
1343   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1344   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1345   if (size==1) {
1346     lcolp = colp;
1347   } else {
1348     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1349     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1350     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
1351   }
1352   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1353   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1354   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1355   if (size>1) {
1356     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1357     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1358   }
1359   /* now we just get the submatrix */
1360   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1361   /* clean up */
1362   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1363   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1364   PetscFunctionReturn(0);
1365 }
1366 
1367 #undef __FUNCT__
1368 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1369 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1370 {
1371   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1372   Mat            A = mat->A,B = mat->B;
1373   PetscErrorCode ierr;
1374   PetscReal      isend[5],irecv[5];
1375 
1376   PetscFunctionBegin;
1377   info->block_size     = 1.0;
1378   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1379   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1380   isend[3] = info->memory;  isend[4] = info->mallocs;
1381   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1382   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1383   isend[3] += info->memory;  isend[4] += info->mallocs;
1384   if (flag == MAT_LOCAL) {
1385     info->nz_used      = isend[0];
1386     info->nz_allocated = isend[1];
1387     info->nz_unneeded  = isend[2];
1388     info->memory       = isend[3];
1389     info->mallocs      = isend[4];
1390   } else if (flag == MAT_GLOBAL_MAX) {
1391     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1392     info->nz_used      = irecv[0];
1393     info->nz_allocated = irecv[1];
1394     info->nz_unneeded  = irecv[2];
1395     info->memory       = irecv[3];
1396     info->mallocs      = irecv[4];
1397   } else if (flag == MAT_GLOBAL_SUM) {
1398     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1399     info->nz_used      = irecv[0];
1400     info->nz_allocated = irecv[1];
1401     info->nz_unneeded  = irecv[2];
1402     info->memory       = irecv[3];
1403     info->mallocs      = irecv[4];
1404   }
1405   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1406   info->fill_ratio_needed = 0;
1407   info->factor_mallocs    = 0;
1408 
1409   PetscFunctionReturn(0);
1410 }
1411 
1412 #undef __FUNCT__
1413 #define __FUNCT__ "MatSetOption_MPIAIJ"
1414 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1415 {
1416   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1417   PetscErrorCode ierr;
1418 
1419   PetscFunctionBegin;
1420   switch (op) {
1421   case MAT_NEW_NONZERO_LOCATIONS:
1422   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1423   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1424   case MAT_KEEP_NONZERO_PATTERN:
1425   case MAT_NEW_NONZERO_LOCATION_ERR:
1426   case MAT_USE_INODES:
1427   case MAT_IGNORE_ZERO_ENTRIES:
1428     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1429     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1430     break;
1431   case MAT_ROW_ORIENTED:
1432     a->roworiented = flg;
1433     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1434     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1435     break;
1436   case MAT_NEW_DIAGONALS:
1437     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1438     break;
1439   case MAT_IGNORE_OFF_PROC_ENTRIES:
1440     a->donotstash = PETSC_TRUE;
1441     break;
1442   case MAT_SPD:
1443     A->spd_set                         = PETSC_TRUE;
1444     A->spd                             = flg;
1445     if (flg) {
1446       A->symmetric                     = PETSC_TRUE;
1447       A->structurally_symmetric        = PETSC_TRUE;
1448       A->symmetric_set                 = PETSC_TRUE;
1449       A->structurally_symmetric_set    = PETSC_TRUE;
1450     }
1451     break;
1452   case MAT_SYMMETRIC:
1453     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1454     break;
1455   case MAT_STRUCTURALLY_SYMMETRIC:
1456     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1457     break;
1458   case MAT_HERMITIAN:
1459     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1460     break;
1461   case MAT_SYMMETRY_ETERNAL:
1462     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1463     break;
1464   default:
1465     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1466   }
1467   PetscFunctionReturn(0);
1468 }
1469 
1470 #undef __FUNCT__
1471 #define __FUNCT__ "MatGetRow_MPIAIJ"
1472 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1473 {
1474   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1475   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1476   PetscErrorCode ierr;
1477   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1478   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1479   PetscInt       *cmap,*idx_p;
1480 
1481   PetscFunctionBegin;
1482   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1483   mat->getrowactive = PETSC_TRUE;
1484 
1485   if (!mat->rowvalues && (idx || v)) {
1486     /*
1487         allocate enough space to hold information from the longest row.
1488     */
1489     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1490     PetscInt   max = 1,tmp;
1491     for (i=0; i<matin->rmap->n; i++) {
1492       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1493       if (max < tmp) { max = tmp; }
1494     }
1495     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1496   }
1497 
1498   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1499   lrow = row - rstart;
1500 
1501   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1502   if (!v)   {pvA = 0; pvB = 0;}
1503   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1504   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1505   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1506   nztot = nzA + nzB;
1507 
1508   cmap  = mat->garray;
1509   if (v  || idx) {
1510     if (nztot) {
1511       /* Sort by increasing column numbers, assuming A and B already sorted */
1512       PetscInt imark = -1;
1513       if (v) {
1514         *v = v_p = mat->rowvalues;
1515         for (i=0; i<nzB; i++) {
1516           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1517           else break;
1518         }
1519         imark = i;
1520         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1521         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1522       }
1523       if (idx) {
1524         *idx = idx_p = mat->rowindices;
1525         if (imark > -1) {
1526           for (i=0; i<imark; i++) {
1527             idx_p[i] = cmap[cworkB[i]];
1528           }
1529         } else {
1530           for (i=0; i<nzB; i++) {
1531             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1532             else break;
1533           }
1534           imark = i;
1535         }
1536         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1537         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1538       }
1539     } else {
1540       if (idx) *idx = 0;
1541       if (v)   *v   = 0;
1542     }
1543   }
1544   *nz = nztot;
1545   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1546   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1547   PetscFunctionReturn(0);
1548 }
1549 
1550 #undef __FUNCT__
1551 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1552 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1553 {
1554   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1555 
1556   PetscFunctionBegin;
1557   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1558   aij->getrowactive = PETSC_FALSE;
1559   PetscFunctionReturn(0);
1560 }
1561 
1562 #undef __FUNCT__
1563 #define __FUNCT__ "MatNorm_MPIAIJ"
1564 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1565 {
1566   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1567   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1568   PetscErrorCode ierr;
1569   PetscInt       i,j,cstart = mat->cmap->rstart;
1570   PetscReal      sum = 0.0;
1571   MatScalar      *v;
1572 
1573   PetscFunctionBegin;
1574   if (aij->size == 1) {
1575     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1576   } else {
1577     if (type == NORM_FROBENIUS) {
1578       v = amat->a;
1579       for (i=0; i<amat->nz; i++) {
1580 #if defined(PETSC_USE_COMPLEX)
1581         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1582 #else
1583         sum += (*v)*(*v); v++;
1584 #endif
1585       }
1586       v = bmat->a;
1587       for (i=0; i<bmat->nz; i++) {
1588 #if defined(PETSC_USE_COMPLEX)
1589         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1590 #else
1591         sum += (*v)*(*v); v++;
1592 #endif
1593       }
1594       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1595       *norm = sqrt(*norm);
1596     } else if (type == NORM_1) { /* max column norm */
1597       PetscReal *tmp,*tmp2;
1598       PetscInt  *jj,*garray = aij->garray;
1599       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1600       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1601       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1602       *norm = 0.0;
1603       v = amat->a; jj = amat->j;
1604       for (j=0; j<amat->nz; j++) {
1605         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1606       }
1607       v = bmat->a; jj = bmat->j;
1608       for (j=0; j<bmat->nz; j++) {
1609         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1610       }
1611       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1612       for (j=0; j<mat->cmap->N; j++) {
1613         if (tmp2[j] > *norm) *norm = tmp2[j];
1614       }
1615       ierr = PetscFree(tmp);CHKERRQ(ierr);
1616       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1617     } else if (type == NORM_INFINITY) { /* max row norm */
1618       PetscReal ntemp = 0.0;
1619       for (j=0; j<aij->A->rmap->n; j++) {
1620         v = amat->a + amat->i[j];
1621         sum = 0.0;
1622         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1623           sum += PetscAbsScalar(*v); v++;
1624         }
1625         v = bmat->a + bmat->i[j];
1626         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1627           sum += PetscAbsScalar(*v); v++;
1628         }
1629         if (sum > ntemp) ntemp = sum;
1630       }
1631       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1632     } else {
1633       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1634     }
1635   }
1636   PetscFunctionReturn(0);
1637 }
1638 
1639 #undef __FUNCT__
1640 #define __FUNCT__ "MatTranspose_MPIAIJ"
1641 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1642 {
1643   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1644   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1645   PetscErrorCode ierr;
1646   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1647   PetscInt       cstart=A->cmap->rstart,ncol;
1648   Mat            B;
1649   MatScalar      *array;
1650 
1651   PetscFunctionBegin;
1652   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1653 
1654   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1655   ai = Aloc->i; aj = Aloc->j;
1656   bi = Bloc->i; bj = Bloc->j;
1657   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1658     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1659     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1660     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1661     for (i=0; i<ai[ma]; i++){
1662       d_nnz[aj[i]] ++;
1663       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1664     }
1665 
1666     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1667     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1668     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1669     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1670     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1671   } else {
1672     B = *matout;
1673   }
1674 
1675   /* copy over the A part */
1676   array = Aloc->a;
1677   row = A->rmap->rstart;
1678   for (i=0; i<ma; i++) {
1679     ncol = ai[i+1]-ai[i];
1680     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1681     row++; array += ncol; aj += ncol;
1682   }
1683   aj = Aloc->j;
1684   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1685 
1686   /* copy over the B part */
1687   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1688   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1689   array = Bloc->a;
1690   row = A->rmap->rstart;
1691   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1692   cols_tmp = cols;
1693   for (i=0; i<mb; i++) {
1694     ncol = bi[i+1]-bi[i];
1695     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1696     row++; array += ncol; cols_tmp += ncol;
1697   }
1698   ierr = PetscFree(cols);CHKERRQ(ierr);
1699 
1700   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1701   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1702   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1703     *matout = B;
1704   } else {
1705     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1706   }
1707   PetscFunctionReturn(0);
1708 }
1709 
1710 #undef __FUNCT__
1711 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1712 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1713 {
1714   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1715   Mat            a = aij->A,b = aij->B;
1716   PetscErrorCode ierr;
1717   PetscInt       s1,s2,s3;
1718 
1719   PetscFunctionBegin;
1720   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1721   if (rr) {
1722     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1723     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1724     /* Overlap communication with computation. */
1725     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1726   }
1727   if (ll) {
1728     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1729     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1730     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1731   }
1732   /* scale  the diagonal block */
1733   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1734 
1735   if (rr) {
1736     /* Do a scatter end and then right scale the off-diagonal block */
1737     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1738     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1739   }
1740 
1741   PetscFunctionReturn(0);
1742 }
1743 
1744 #undef __FUNCT__
1745 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1746 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1747 {
1748   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1749   PetscErrorCode ierr;
1750 
1751   PetscFunctionBegin;
1752   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1753   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1754   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
1755   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
1756   PetscFunctionReturn(0);
1757 }
1758 #undef __FUNCT__
1759 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1760 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1761 {
1762   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1763   PetscErrorCode ierr;
1764 
1765   PetscFunctionBegin;
1766   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1767   PetscFunctionReturn(0);
1768 }
1769 
1770 #undef __FUNCT__
1771 #define __FUNCT__ "MatEqual_MPIAIJ"
1772 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1773 {
1774   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1775   Mat            a,b,c,d;
1776   PetscTruth     flg;
1777   PetscErrorCode ierr;
1778 
1779   PetscFunctionBegin;
1780   a = matA->A; b = matA->B;
1781   c = matB->A; d = matB->B;
1782 
1783   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1784   if (flg) {
1785     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1786   }
1787   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1788   PetscFunctionReturn(0);
1789 }
1790 
1791 #undef __FUNCT__
1792 #define __FUNCT__ "MatCopy_MPIAIJ"
1793 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1794 {
1795   PetscErrorCode ierr;
1796   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1797   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1798 
1799   PetscFunctionBegin;
1800   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1801   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1802     /* because of the column compression in the off-processor part of the matrix a->B,
1803        the number of columns in a->B and b->B may be different, hence we cannot call
1804        the MatCopy() directly on the two parts. If need be, we can provide a more
1805        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1806        then copying the submatrices */
1807     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1808   } else {
1809     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1810     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1811   }
1812   PetscFunctionReturn(0);
1813 }
1814 
1815 #undef __FUNCT__
1816 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1817 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1818 {
1819   PetscErrorCode ierr;
1820 
1821   PetscFunctionBegin;
1822   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1823   PetscFunctionReturn(0);
1824 }
1825 
1826 #undef __FUNCT__
1827 #define __FUNCT__ "MatAXPY_MPIAIJ"
1828 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1829 {
1830   PetscErrorCode ierr;
1831   PetscInt       i;
1832   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1833   PetscBLASInt   bnz,one=1;
1834   Mat_SeqAIJ     *x,*y;
1835 
1836   PetscFunctionBegin;
1837   if (str == SAME_NONZERO_PATTERN) {
1838     PetscScalar alpha = a;
1839     x = (Mat_SeqAIJ *)xx->A->data;
1840     y = (Mat_SeqAIJ *)yy->A->data;
1841     bnz = PetscBLASIntCast(x->nz);
1842     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1843     x = (Mat_SeqAIJ *)xx->B->data;
1844     y = (Mat_SeqAIJ *)yy->B->data;
1845     bnz = PetscBLASIntCast(x->nz);
1846     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1847   } else if (str == SUBSET_NONZERO_PATTERN) {
1848     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1849 
1850     x = (Mat_SeqAIJ *)xx->B->data;
1851     y = (Mat_SeqAIJ *)yy->B->data;
1852     if (y->xtoy && y->XtoY != xx->B) {
1853       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1854       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1855     }
1856     if (!y->xtoy) { /* get xtoy */
1857       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1858       y->XtoY = xx->B;
1859       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1860     }
1861     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1862   } else {
1863     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1864   }
1865   PetscFunctionReturn(0);
1866 }
1867 
1868 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1869 
1870 #undef __FUNCT__
1871 #define __FUNCT__ "MatConjugate_MPIAIJ"
1872 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1873 {
1874 #if defined(PETSC_USE_COMPLEX)
1875   PetscErrorCode ierr;
1876   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1877 
1878   PetscFunctionBegin;
1879   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1880   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1881 #else
1882   PetscFunctionBegin;
1883 #endif
1884   PetscFunctionReturn(0);
1885 }
1886 
1887 #undef __FUNCT__
1888 #define __FUNCT__ "MatRealPart_MPIAIJ"
1889 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1890 {
1891   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1892   PetscErrorCode ierr;
1893 
1894   PetscFunctionBegin;
1895   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1896   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1897   PetscFunctionReturn(0);
1898 }
1899 
1900 #undef __FUNCT__
1901 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1902 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1903 {
1904   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1905   PetscErrorCode ierr;
1906 
1907   PetscFunctionBegin;
1908   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1909   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1910   PetscFunctionReturn(0);
1911 }
1912 
1913 #ifdef PETSC_HAVE_PBGL
1914 
1915 #include <boost/parallel/mpi/bsp_process_group.hpp>
1916 #include <boost/graph/distributed/ilu_default_graph.hpp>
1917 #include <boost/graph/distributed/ilu_0_block.hpp>
1918 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1919 #include <boost/graph/distributed/petsc/interface.hpp>
1920 #include <boost/multi_array.hpp>
1921 #include <boost/parallel/distributed_property_map->hpp>
1922 
1923 #undef __FUNCT__
1924 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1925 /*
1926   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1927 */
1928 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1929 {
1930   namespace petsc = boost::distributed::petsc;
1931 
1932   namespace graph_dist = boost::graph::distributed;
1933   using boost::graph::distributed::ilu_default::process_group_type;
1934   using boost::graph::ilu_permuted;
1935 
1936   PetscTruth      row_identity, col_identity;
1937   PetscContainer  c;
1938   PetscInt        m, n, M, N;
1939   PetscErrorCode  ierr;
1940 
1941   PetscFunctionBegin;
1942   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1943   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1944   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1945   if (!row_identity || !col_identity) {
1946     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1947   }
1948 
1949   process_group_type pg;
1950   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1951   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1952   lgraph_type&   level_graph = *lgraph_p;
1953   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1954 
1955   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1956   ilu_permuted(level_graph);
1957 
1958   /* put together the new matrix */
1959   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1960   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1961   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1962   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1963   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1964   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1965   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1966 
1967   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1968   ierr = PetscContainerSetPointer(c, lgraph_p);
1969   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1970   PetscFunctionReturn(0);
1971 }
1972 
1973 #undef __FUNCT__
1974 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1975 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1976 {
1977   PetscFunctionBegin;
1978   PetscFunctionReturn(0);
1979 }
1980 
1981 #undef __FUNCT__
1982 #define __FUNCT__ "MatSolve_MPIAIJ"
1983 /*
1984   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1985 */
1986 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1987 {
1988   namespace graph_dist = boost::graph::distributed;
1989 
1990   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1991   lgraph_type*   lgraph_p;
1992   PetscContainer c;
1993   PetscErrorCode ierr;
1994 
1995   PetscFunctionBegin;
1996   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1997   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1998   ierr = VecCopy(b, x);CHKERRQ(ierr);
1999 
2000   PetscScalar* array_x;
2001   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2002   PetscInt sx;
2003   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2004 
2005   PetscScalar* array_b;
2006   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2007   PetscInt sb;
2008   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2009 
2010   lgraph_type&   level_graph = *lgraph_p;
2011   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2012 
2013   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2014   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2015                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2016 
2017   typedef boost::iterator_property_map<array_ref_type::iterator,
2018                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2019   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2020                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2021 
2022   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2023 
2024   PetscFunctionReturn(0);
2025 }
2026 #endif
2027 
2028 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2029   PetscInt       nzlocal,nsends,nrecvs;
2030   PetscMPIInt    *send_rank,*recv_rank;
2031   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2032   PetscScalar    *sbuf_a,**rbuf_a;
2033   PetscErrorCode (*MatDestroy)(Mat);
2034 } Mat_Redundant;
2035 
2036 #undef __FUNCT__
2037 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2038 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2039 {
2040   PetscErrorCode       ierr;
2041   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2042   PetscInt             i;
2043 
2044   PetscFunctionBegin;
2045   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2046   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2047   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2048   for (i=0; i<redund->nrecvs; i++){
2049     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2050     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2051   }
2052   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2053   ierr = PetscFree(redund);CHKERRQ(ierr);
2054   PetscFunctionReturn(0);
2055 }
2056 
2057 #undef __FUNCT__
2058 #define __FUNCT__ "MatDestroy_MatRedundant"
2059 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2060 {
2061   PetscErrorCode  ierr;
2062   PetscContainer  container;
2063   Mat_Redundant   *redund=PETSC_NULL;
2064 
2065   PetscFunctionBegin;
2066   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2067   if (container) {
2068     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2069   } else {
2070     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2071   }
2072   A->ops->destroy = redund->MatDestroy;
2073   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2074   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2075   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2076   PetscFunctionReturn(0);
2077 }
2078 
2079 #undef __FUNCT__
2080 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2081 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2082 {
2083   PetscMPIInt    rank,size;
2084   MPI_Comm       comm=((PetscObject)mat)->comm;
2085   PetscErrorCode ierr;
2086   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2087   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2088   PetscInt       *rowrange=mat->rmap->range;
2089   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2090   Mat            A=aij->A,B=aij->B,C=*matredundant;
2091   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2092   PetscScalar    *sbuf_a;
2093   PetscInt       nzlocal=a->nz+b->nz;
2094   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2095   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2096   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2097   MatScalar      *aworkA,*aworkB;
2098   PetscScalar    *vals;
2099   PetscMPIInt    tag1,tag2,tag3,imdex;
2100   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2101                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2102   MPI_Status     recv_status,*send_status;
2103   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2104   PetscInt       **rbuf_j=PETSC_NULL;
2105   PetscScalar    **rbuf_a=PETSC_NULL;
2106   Mat_Redundant  *redund=PETSC_NULL;
2107   PetscContainer container;
2108 
2109   PetscFunctionBegin;
2110   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2111   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2112 
2113   if (reuse == MAT_REUSE_MATRIX) {
2114     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2115     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2116     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2117     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2118     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2119     if (container) {
2120       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2121     } else {
2122       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2123     }
2124     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2125 
2126     nsends    = redund->nsends;
2127     nrecvs    = redund->nrecvs;
2128     send_rank = redund->send_rank;
2129     recv_rank = redund->recv_rank;
2130     sbuf_nz   = redund->sbuf_nz;
2131     rbuf_nz   = redund->rbuf_nz;
2132     sbuf_j    = redund->sbuf_j;
2133     sbuf_a    = redund->sbuf_a;
2134     rbuf_j    = redund->rbuf_j;
2135     rbuf_a    = redund->rbuf_a;
2136   }
2137 
2138   if (reuse == MAT_INITIAL_MATRIX){
2139     PetscMPIInt  subrank,subsize;
2140     PetscInt     nleftover,np_subcomm;
2141     /* get the destination processors' id send_rank, nsends and nrecvs */
2142     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2143     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2144     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2145     np_subcomm = size/nsubcomm;
2146     nleftover  = size - nsubcomm*np_subcomm;
2147     nsends = 0; nrecvs = 0;
2148     for (i=0; i<size; i++){ /* i=rank*/
2149       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2150         send_rank[nsends] = i; nsends++;
2151         recv_rank[nrecvs++] = i;
2152       }
2153     }
2154     if (rank >= size - nleftover){/* this proc is a leftover processor */
2155       i = size-nleftover-1;
2156       j = 0;
2157       while (j < nsubcomm - nleftover){
2158         send_rank[nsends++] = i;
2159         i--; j++;
2160       }
2161     }
2162 
2163     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2164       for (i=0; i<nleftover; i++){
2165         recv_rank[nrecvs++] = size-nleftover+i;
2166       }
2167     }
2168 
2169     /* allocate sbuf_j, sbuf_a */
2170     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2171     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2172     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2173   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2174 
2175   /* copy mat's local entries into the buffers */
2176   if (reuse == MAT_INITIAL_MATRIX){
2177     rownz_max = 0;
2178     rptr = sbuf_j;
2179     cols = sbuf_j + rend-rstart + 1;
2180     vals = sbuf_a;
2181     rptr[0] = 0;
2182     for (i=0; i<rend-rstart; i++){
2183       row = i + rstart;
2184       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2185       ncols  = nzA + nzB;
2186       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2187       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2188       /* load the column indices for this row into cols */
2189       lwrite = 0;
2190       for (l=0; l<nzB; l++) {
2191         if ((ctmp = bmap[cworkB[l]]) < cstart){
2192           vals[lwrite]   = aworkB[l];
2193           cols[lwrite++] = ctmp;
2194         }
2195       }
2196       for (l=0; l<nzA; l++){
2197         vals[lwrite]   = aworkA[l];
2198         cols[lwrite++] = cstart + cworkA[l];
2199       }
2200       for (l=0; l<nzB; l++) {
2201         if ((ctmp = bmap[cworkB[l]]) >= cend){
2202           vals[lwrite]   = aworkB[l];
2203           cols[lwrite++] = ctmp;
2204         }
2205       }
2206       vals += ncols;
2207       cols += ncols;
2208       rptr[i+1] = rptr[i] + ncols;
2209       if (rownz_max < ncols) rownz_max = ncols;
2210     }
2211     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2212   } else { /* only copy matrix values into sbuf_a */
2213     rptr = sbuf_j;
2214     vals = sbuf_a;
2215     rptr[0] = 0;
2216     for (i=0; i<rend-rstart; i++){
2217       row = i + rstart;
2218       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2219       ncols  = nzA + nzB;
2220       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2221       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2222       lwrite = 0;
2223       for (l=0; l<nzB; l++) {
2224         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2225       }
2226       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2227       for (l=0; l<nzB; l++) {
2228         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2229       }
2230       vals += ncols;
2231       rptr[i+1] = rptr[i] + ncols;
2232     }
2233   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2234 
2235   /* send nzlocal to others, and recv other's nzlocal */
2236   /*--------------------------------------------------*/
2237   if (reuse == MAT_INITIAL_MATRIX){
2238     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2239     s_waits2 = s_waits3 + nsends;
2240     s_waits1 = s_waits2 + nsends;
2241     r_waits1 = s_waits1 + nsends;
2242     r_waits2 = r_waits1 + nrecvs;
2243     r_waits3 = r_waits2 + nrecvs;
2244   } else {
2245     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2246     r_waits3 = s_waits3 + nsends;
2247   }
2248 
2249   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2250   if (reuse == MAT_INITIAL_MATRIX){
2251     /* get new tags to keep the communication clean */
2252     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2253     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2254     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2255 
2256     /* post receives of other's nzlocal */
2257     for (i=0; i<nrecvs; i++){
2258       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2259     }
2260     /* send nzlocal to others */
2261     for (i=0; i<nsends; i++){
2262       sbuf_nz[i] = nzlocal;
2263       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2264     }
2265     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2266     count = nrecvs;
2267     while (count) {
2268       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2269       recv_rank[imdex] = recv_status.MPI_SOURCE;
2270       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2271       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2272 
2273       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2274       rbuf_nz[imdex] += i + 2;
2275       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2276       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2277       count--;
2278     }
2279     /* wait on sends of nzlocal */
2280     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2281     /* send mat->i,j to others, and recv from other's */
2282     /*------------------------------------------------*/
2283     for (i=0; i<nsends; i++){
2284       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2285       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2286     }
2287     /* wait on receives of mat->i,j */
2288     /*------------------------------*/
2289     count = nrecvs;
2290     while (count) {
2291       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2292       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2293       count--;
2294     }
2295     /* wait on sends of mat->i,j */
2296     /*---------------------------*/
2297     if (nsends) {
2298       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2299     }
2300   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2301 
2302   /* post receives, send and receive mat->a */
2303   /*----------------------------------------*/
2304   for (imdex=0; imdex<nrecvs; imdex++) {
2305     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2306   }
2307   for (i=0; i<nsends; i++){
2308     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2309   }
2310   count = nrecvs;
2311   while (count) {
2312     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2313     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2314     count--;
2315   }
2316   if (nsends) {
2317     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2318   }
2319 
2320   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2321 
2322   /* create redundant matrix */
2323   /*-------------------------*/
2324   if (reuse == MAT_INITIAL_MATRIX){
2325     /* compute rownz_max for preallocation */
2326     for (imdex=0; imdex<nrecvs; imdex++){
2327       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2328       rptr = rbuf_j[imdex];
2329       for (i=0; i<j; i++){
2330         ncols = rptr[i+1] - rptr[i];
2331         if (rownz_max < ncols) rownz_max = ncols;
2332       }
2333     }
2334 
2335     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2336     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2337     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2338     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2339     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2340   } else {
2341     C = *matredundant;
2342   }
2343 
2344   /* insert local matrix entries */
2345   rptr = sbuf_j;
2346   cols = sbuf_j + rend-rstart + 1;
2347   vals = sbuf_a;
2348   for (i=0; i<rend-rstart; i++){
2349     row   = i + rstart;
2350     ncols = rptr[i+1] - rptr[i];
2351     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2352     vals += ncols;
2353     cols += ncols;
2354   }
2355   /* insert received matrix entries */
2356   for (imdex=0; imdex<nrecvs; imdex++){
2357     rstart = rowrange[recv_rank[imdex]];
2358     rend   = rowrange[recv_rank[imdex]+1];
2359     rptr = rbuf_j[imdex];
2360     cols = rbuf_j[imdex] + rend-rstart + 1;
2361     vals = rbuf_a[imdex];
2362     for (i=0; i<rend-rstart; i++){
2363       row   = i + rstart;
2364       ncols = rptr[i+1] - rptr[i];
2365       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2366       vals += ncols;
2367       cols += ncols;
2368     }
2369   }
2370   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2371   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2372   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2373   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2374   if (reuse == MAT_INITIAL_MATRIX){
2375     PetscContainer container;
2376     *matredundant = C;
2377     /* create a supporting struct and attach it to C for reuse */
2378     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2379     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2380     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2381     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2382     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2383 
2384     redund->nzlocal = nzlocal;
2385     redund->nsends  = nsends;
2386     redund->nrecvs  = nrecvs;
2387     redund->send_rank = send_rank;
2388     redund->recv_rank = recv_rank;
2389     redund->sbuf_nz = sbuf_nz;
2390     redund->rbuf_nz = rbuf_nz;
2391     redund->sbuf_j  = sbuf_j;
2392     redund->sbuf_a  = sbuf_a;
2393     redund->rbuf_j  = rbuf_j;
2394     redund->rbuf_a  = rbuf_a;
2395 
2396     redund->MatDestroy = C->ops->destroy;
2397     C->ops->destroy    = MatDestroy_MatRedundant;
2398   }
2399   PetscFunctionReturn(0);
2400 }
2401 
2402 #undef __FUNCT__
2403 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2404 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2405 {
2406   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2407   PetscErrorCode ierr;
2408   PetscInt       i,*idxb = 0;
2409   PetscScalar    *va,*vb;
2410   Vec            vtmp;
2411 
2412   PetscFunctionBegin;
2413   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2414   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2415   if (idx) {
2416     for (i=0; i<A->rmap->n; i++) {
2417       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2418     }
2419   }
2420 
2421   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2422   if (idx) {
2423     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2424   }
2425   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2426   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2427 
2428   for (i=0; i<A->rmap->n; i++){
2429     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2430       va[i] = vb[i];
2431       if (idx) idx[i] = a->garray[idxb[i]];
2432     }
2433   }
2434 
2435   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2436   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2437   if (idxb) {
2438     ierr = PetscFree(idxb);CHKERRQ(ierr);
2439   }
2440   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2441   PetscFunctionReturn(0);
2442 }
2443 
2444 #undef __FUNCT__
2445 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2446 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2447 {
2448   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2449   PetscErrorCode ierr;
2450   PetscInt       i,*idxb = 0;
2451   PetscScalar    *va,*vb;
2452   Vec            vtmp;
2453 
2454   PetscFunctionBegin;
2455   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2456   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2457   if (idx) {
2458     for (i=0; i<A->cmap->n; i++) {
2459       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2460     }
2461   }
2462 
2463   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2464   if (idx) {
2465     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2466   }
2467   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2468   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2469 
2470   for (i=0; i<A->rmap->n; i++){
2471     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2472       va[i] = vb[i];
2473       if (idx) idx[i] = a->garray[idxb[i]];
2474     }
2475   }
2476 
2477   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2478   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2479   if (idxb) {
2480     ierr = PetscFree(idxb);CHKERRQ(ierr);
2481   }
2482   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2483   PetscFunctionReturn(0);
2484 }
2485 
2486 #undef __FUNCT__
2487 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2488 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2489 {
2490   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2491   PetscInt       n      = A->rmap->n;
2492   PetscInt       cstart = A->cmap->rstart;
2493   PetscInt      *cmap   = mat->garray;
2494   PetscInt      *diagIdx, *offdiagIdx;
2495   Vec            diagV, offdiagV;
2496   PetscScalar   *a, *diagA, *offdiagA;
2497   PetscInt       r;
2498   PetscErrorCode ierr;
2499 
2500   PetscFunctionBegin;
2501   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2502   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2503   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2504   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2505   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2506   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2507   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2508   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2509   for(r = 0; r < n; ++r) {
2510     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2511       a[r]   = diagA[r];
2512       idx[r] = cstart + diagIdx[r];
2513     } else {
2514       a[r]   = offdiagA[r];
2515       idx[r] = cmap[offdiagIdx[r]];
2516     }
2517   }
2518   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2519   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2520   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2521   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2522   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2523   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2524   PetscFunctionReturn(0);
2525 }
2526 
2527 #undef __FUNCT__
2528 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2529 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2530 {
2531   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2532   PetscInt       n      = A->rmap->n;
2533   PetscInt       cstart = A->cmap->rstart;
2534   PetscInt      *cmap   = mat->garray;
2535   PetscInt      *diagIdx, *offdiagIdx;
2536   Vec            diagV, offdiagV;
2537   PetscScalar   *a, *diagA, *offdiagA;
2538   PetscInt       r;
2539   PetscErrorCode ierr;
2540 
2541   PetscFunctionBegin;
2542   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2543   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2544   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2545   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2546   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2547   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2548   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2549   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2550   for(r = 0; r < n; ++r) {
2551     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2552       a[r]   = diagA[r];
2553       idx[r] = cstart + diagIdx[r];
2554     } else {
2555       a[r]   = offdiagA[r];
2556       idx[r] = cmap[offdiagIdx[r]];
2557     }
2558   }
2559   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2560   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2561   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2562   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2563   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2564   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2565   PetscFunctionReturn(0);
2566 }
2567 
2568 #undef __FUNCT__
2569 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2570 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2571 {
2572   PetscErrorCode ierr;
2573   Mat            *dummy;
2574 
2575   PetscFunctionBegin;
2576   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2577   *newmat = *dummy;
2578   ierr = PetscFree(dummy);CHKERRQ(ierr);
2579   PetscFunctionReturn(0);
2580 }
2581 
2582 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2583 /* -------------------------------------------------------------------*/
2584 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2585        MatGetRow_MPIAIJ,
2586        MatRestoreRow_MPIAIJ,
2587        MatMult_MPIAIJ,
2588 /* 4*/ MatMultAdd_MPIAIJ,
2589        MatMultTranspose_MPIAIJ,
2590        MatMultTransposeAdd_MPIAIJ,
2591 #ifdef PETSC_HAVE_PBGL
2592        MatSolve_MPIAIJ,
2593 #else
2594        0,
2595 #endif
2596        0,
2597        0,
2598 /*10*/ 0,
2599        0,
2600        0,
2601        MatSOR_MPIAIJ,
2602        MatTranspose_MPIAIJ,
2603 /*15*/ MatGetInfo_MPIAIJ,
2604        MatEqual_MPIAIJ,
2605        MatGetDiagonal_MPIAIJ,
2606        MatDiagonalScale_MPIAIJ,
2607        MatNorm_MPIAIJ,
2608 /*20*/ MatAssemblyBegin_MPIAIJ,
2609        MatAssemblyEnd_MPIAIJ,
2610        MatSetOption_MPIAIJ,
2611        MatZeroEntries_MPIAIJ,
2612 /*24*/ MatZeroRows_MPIAIJ,
2613        0,
2614 #ifdef PETSC_HAVE_PBGL
2615        0,
2616 #else
2617        0,
2618 #endif
2619        0,
2620        0,
2621 /*29*/ MatSetUpPreallocation_MPIAIJ,
2622 #ifdef PETSC_HAVE_PBGL
2623        0,
2624 #else
2625        0,
2626 #endif
2627        0,
2628        0,
2629        0,
2630 /*34*/ MatDuplicate_MPIAIJ,
2631        0,
2632        0,
2633        0,
2634        0,
2635 /*39*/ MatAXPY_MPIAIJ,
2636        MatGetSubMatrices_MPIAIJ,
2637        MatIncreaseOverlap_MPIAIJ,
2638        MatGetValues_MPIAIJ,
2639        MatCopy_MPIAIJ,
2640 /*44*/ MatGetRowMax_MPIAIJ,
2641        MatScale_MPIAIJ,
2642        0,
2643        0,
2644        0,
2645 /*49*/ MatSetBlockSize_MPIAIJ,
2646        0,
2647        0,
2648        0,
2649        0,
2650 /*54*/ MatFDColoringCreate_MPIAIJ,
2651        0,
2652        MatSetUnfactored_MPIAIJ,
2653        MatPermute_MPIAIJ,
2654        0,
2655 /*59*/ MatGetSubMatrix_MPIAIJ,
2656        MatDestroy_MPIAIJ,
2657        MatView_MPIAIJ,
2658        0,
2659        0,
2660 /*64*/ 0,
2661        0,
2662        0,
2663        0,
2664        0,
2665 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2666        MatGetRowMinAbs_MPIAIJ,
2667        0,
2668        MatSetColoring_MPIAIJ,
2669 #if defined(PETSC_HAVE_ADIC)
2670        MatSetValuesAdic_MPIAIJ,
2671 #else
2672        0,
2673 #endif
2674        MatSetValuesAdifor_MPIAIJ,
2675 /*75*/ MatFDColoringApply_AIJ,
2676        0,
2677        0,
2678        0,
2679        0,
2680 /*80*/ 0,
2681        0,
2682        0,
2683 /*83*/ MatLoad_MPIAIJ,
2684        0,
2685        0,
2686        0,
2687        0,
2688        0,
2689 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2690        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2691        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2692        MatPtAP_Basic,
2693        MatPtAPSymbolic_MPIAIJ,
2694 /*94*/ MatPtAPNumeric_MPIAIJ,
2695        0,
2696        0,
2697        0,
2698        0,
2699 /*99*/ 0,
2700        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2701        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2702        MatConjugate_MPIAIJ,
2703        0,
2704 /*104*/MatSetValuesRow_MPIAIJ,
2705        MatRealPart_MPIAIJ,
2706        MatImaginaryPart_MPIAIJ,
2707        0,
2708        0,
2709 /*109*/0,
2710        MatGetRedundantMatrix_MPIAIJ,
2711        MatGetRowMin_MPIAIJ,
2712        0,
2713        0,
2714 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
2715        0,
2716        0,
2717        0,
2718        0,
2719 /*119*/0,
2720        0,
2721        0,
2722        0,
2723        MatGetMultiProcBlock_MPIAIJ
2724 };
2725 
2726 /* ----------------------------------------------------------------------------------------*/
2727 
2728 EXTERN_C_BEGIN
2729 #undef __FUNCT__
2730 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2731 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2732 {
2733   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2734   PetscErrorCode ierr;
2735 
2736   PetscFunctionBegin;
2737   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2738   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2739   PetscFunctionReturn(0);
2740 }
2741 EXTERN_C_END
2742 
2743 EXTERN_C_BEGIN
2744 #undef __FUNCT__
2745 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2746 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2747 {
2748   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2749   PetscErrorCode ierr;
2750 
2751   PetscFunctionBegin;
2752   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2753   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2754   PetscFunctionReturn(0);
2755 }
2756 EXTERN_C_END
2757 
2758 EXTERN_C_BEGIN
2759 #undef __FUNCT__
2760 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2761 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2762 {
2763   Mat_MPIAIJ     *b;
2764   PetscErrorCode ierr;
2765   PetscInt       i;
2766 
2767   PetscFunctionBegin;
2768   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2769   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2770   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2771   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2772 
2773   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2774   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2775   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2776   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2777   if (d_nnz) {
2778     for (i=0; i<B->rmap->n; i++) {
2779       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2780     }
2781   }
2782   if (o_nnz) {
2783     for (i=0; i<B->rmap->n; i++) {
2784       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2785     }
2786   }
2787   b = (Mat_MPIAIJ*)B->data;
2788 
2789   if (!B->preallocated) {
2790     /* Explicitly create 2 MATSEQAIJ matrices. */
2791     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2792     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2793     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2794     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2795     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2796     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2797     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2798     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2799   }
2800 
2801   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2802   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2803   B->preallocated = PETSC_TRUE;
2804   PetscFunctionReturn(0);
2805 }
2806 EXTERN_C_END
2807 
2808 #undef __FUNCT__
2809 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2810 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2811 {
2812   Mat            mat;
2813   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2814   PetscErrorCode ierr;
2815 
2816   PetscFunctionBegin;
2817   *newmat       = 0;
2818   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2819   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2820   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2821   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2822   a    = (Mat_MPIAIJ*)mat->data;
2823 
2824   mat->factortype    = matin->factortype;
2825   mat->rmap->bs      = matin->rmap->bs;
2826   mat->assembled    = PETSC_TRUE;
2827   mat->insertmode   = NOT_SET_VALUES;
2828   mat->preallocated = PETSC_TRUE;
2829 
2830   a->size           = oldmat->size;
2831   a->rank           = oldmat->rank;
2832   a->donotstash     = oldmat->donotstash;
2833   a->roworiented    = oldmat->roworiented;
2834   a->rowindices     = 0;
2835   a->rowvalues      = 0;
2836   a->getrowactive   = PETSC_FALSE;
2837 
2838   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2839   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2840 
2841   if (oldmat->colmap) {
2842 #if defined (PETSC_USE_CTABLE)
2843     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2844 #else
2845     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2846     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2847     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2848 #endif
2849   } else a->colmap = 0;
2850   if (oldmat->garray) {
2851     PetscInt len;
2852     len  = oldmat->B->cmap->n;
2853     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2854     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2855     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2856   } else a->garray = 0;
2857 
2858   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2859   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2860   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2861   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2862   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2863   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2864   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2865   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2866   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2867   *newmat = mat;
2868   PetscFunctionReturn(0);
2869 }
2870 
2871 /*
2872     Allows sending/receiving larger messages then 2 gigabytes in a single call
2873 */
2874 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm)
2875 {
2876   int             ierr;
2877   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
2878   PetscInt        i,numchunks;
2879   PetscMPIInt     icnt;
2880 
2881   numchunks = cnt/CHUNKSIZE + 1;
2882   for (i=0; i<numchunks; i++) {
2883     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
2884     ierr = MPI_Send(mess,icnt,type,to,tag,comm);
2885     if (type == MPIU_INT) {
2886       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
2887     } else if (type == MPIU_SCALAR) {
2888       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
2889     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
2890   }
2891   return 0;
2892 }
2893 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm)
2894 {
2895   int             ierr;
2896   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
2897   MPI_Status      status;
2898   PetscInt        i,numchunks;
2899   PetscMPIInt     icnt;
2900 
2901   numchunks = cnt/CHUNKSIZE + 1;
2902   for (i=0; i<numchunks; i++) {
2903     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
2904     ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status);
2905     if (type == MPIU_INT) {
2906       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
2907     } else if (type == MPIU_SCALAR) {
2908       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
2909     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
2910   }
2911   return 0;
2912 }
2913 
2914 #undef __FUNCT__
2915 #define __FUNCT__ "MatLoad_MPIAIJ"
2916 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2917 {
2918   PetscScalar    *vals,*svals;
2919   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2920   PetscErrorCode ierr;
2921   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2922   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
2923   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2924   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2925   PetscInt       cend,cstart,n,*rowners,sizesset=1;
2926   int            fd;
2927 
2928   PetscFunctionBegin;
2929   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2930   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2931   if (!rank) {
2932     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2933     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2934     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2935   }
2936 
2937   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
2938 
2939   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2940   M = header[1]; N = header[2];
2941   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2942   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
2943   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
2944 
2945   /* If global sizes are set, check if they are consistent with that given in the file */
2946   if (sizesset) {
2947     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
2948   }
2949   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2950   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2951 
2952   /* determine ownership of all rows */
2953   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
2954   else m = newMat->rmap->n; /* Set by user */
2955 
2956   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2957   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2958 
2959   /* First process needs enough room for process with most rows */
2960   if (!rank) {
2961     mmax       = rowners[1];
2962     for (i=2; i<size; i++) {
2963       mmax = PetscMax(mmax,rowners[i]);
2964     }
2965   } else mmax = m;
2966 
2967   rowners[0] = 0;
2968   for (i=2; i<=size; i++) {
2969     rowners[i] += rowners[i-1];
2970   }
2971   rstart = rowners[rank];
2972   rend   = rowners[rank+1];
2973 
2974   /* distribute row lengths to all processors */
2975   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2976   if (!rank) {
2977     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2978     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2979     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2980     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2981     for (j=0; j<m; j++) {
2982       procsnz[0] += ourlens[j];
2983     }
2984     for (i=1; i<size; i++) {
2985       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2986       /* calculate the number of nonzeros on each processor */
2987       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2988         procsnz[i] += rowlengths[j];
2989       }
2990       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2991     }
2992     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2993   } else {
2994     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2995   }
2996 
2997   if (!rank) {
2998     /* determine max buffer needed and allocate it */
2999     maxnz = 0;
3000     for (i=0; i<size; i++) {
3001       maxnz = PetscMax(maxnz,procsnz[i]);
3002     }
3003     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3004 
3005     /* read in my part of the matrix column indices  */
3006     nz   = procsnz[0];
3007     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3008     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3009 
3010     /* read in every one elses and ship off */
3011     for (i=1; i<size; i++) {
3012       nz     = procsnz[i];
3013       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3014       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3015     }
3016     ierr = PetscFree(cols);CHKERRQ(ierr);
3017   } else {
3018     /* determine buffer space needed for message */
3019     nz = 0;
3020     for (i=0; i<m; i++) {
3021       nz += ourlens[i];
3022     }
3023     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3024 
3025     /* receive message of column indices*/
3026     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3027   }
3028 
3029   /* determine column ownership if matrix is not square */
3030   if (N != M) {
3031     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3032     else n = newMat->cmap->n;
3033     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3034     cstart = cend - n;
3035   } else {
3036     cstart = rstart;
3037     cend   = rend;
3038     n      = cend - cstart;
3039   }
3040 
3041   /* loop over local rows, determining number of off diagonal entries */
3042   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3043   jj = 0;
3044   for (i=0; i<m; i++) {
3045     for (j=0; j<ourlens[i]; j++) {
3046       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3047       jj++;
3048     }
3049   }
3050 
3051   for (i=0; i<m; i++) {
3052     ourlens[i] -= offlens[i];
3053   }
3054   if (!sizesset) {
3055     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3056   }
3057   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3058 
3059   for (i=0; i<m; i++) {
3060     ourlens[i] += offlens[i];
3061   }
3062 
3063   if (!rank) {
3064     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3065 
3066     /* read in my part of the matrix numerical values  */
3067     nz   = procsnz[0];
3068     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3069 
3070     /* insert into matrix */
3071     jj      = rstart;
3072     smycols = mycols;
3073     svals   = vals;
3074     for (i=0; i<m; i++) {
3075       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3076       smycols += ourlens[i];
3077       svals   += ourlens[i];
3078       jj++;
3079     }
3080 
3081     /* read in other processors and ship out */
3082     for (i=1; i<size; i++) {
3083       nz     = procsnz[i];
3084       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3085       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3086     }
3087     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3088   } else {
3089     /* receive numeric values */
3090     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3091 
3092     /* receive message of values*/
3093     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3094 
3095     /* insert into matrix */
3096     jj      = rstart;
3097     smycols = mycols;
3098     svals   = vals;
3099     for (i=0; i<m; i++) {
3100       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3101       smycols += ourlens[i];
3102       svals   += ourlens[i];
3103       jj++;
3104     }
3105   }
3106   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3107   ierr = PetscFree(vals);CHKERRQ(ierr);
3108   ierr = PetscFree(mycols);CHKERRQ(ierr);
3109   ierr = PetscFree(rowners);CHKERRQ(ierr);
3110 
3111   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3112   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3113   PetscFunctionReturn(0);
3114 }
3115 
3116 #undef __FUNCT__
3117 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3118 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3119 {
3120   PetscErrorCode ierr;
3121   IS             iscol_local;
3122   PetscInt       csize;
3123 
3124   PetscFunctionBegin;
3125   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3126   if (call == MAT_REUSE_MATRIX) {
3127     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3128     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3129   } else {
3130     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3131   }
3132   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3133   if (call == MAT_INITIAL_MATRIX) {
3134     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3135     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3136   }
3137   PetscFunctionReturn(0);
3138 }
3139 
3140 #undef __FUNCT__
3141 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3142 /*
3143     Not great since it makes two copies of the submatrix, first an SeqAIJ
3144   in local and then by concatenating the local matrices the end result.
3145   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3146 
3147   Note: This requires a sequential iscol with all indices.
3148 */
3149 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3150 {
3151   PetscErrorCode ierr;
3152   PetscMPIInt    rank,size;
3153   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3154   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3155   Mat            *local,M,Mreuse;
3156   MatScalar      *vwork,*aa;
3157   MPI_Comm       comm = ((PetscObject)mat)->comm;
3158   Mat_SeqAIJ     *aij;
3159 
3160 
3161   PetscFunctionBegin;
3162   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3163   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3164 
3165   if (call ==  MAT_REUSE_MATRIX) {
3166     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3167     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3168     local = &Mreuse;
3169     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3170   } else {
3171     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3172     Mreuse = *local;
3173     ierr   = PetscFree(local);CHKERRQ(ierr);
3174   }
3175 
3176   /*
3177       m - number of local rows
3178       n - number of columns (same on all processors)
3179       rstart - first row in new global matrix generated
3180   */
3181   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3182   if (call == MAT_INITIAL_MATRIX) {
3183     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3184     ii  = aij->i;
3185     jj  = aij->j;
3186 
3187     /*
3188         Determine the number of non-zeros in the diagonal and off-diagonal
3189         portions of the matrix in order to do correct preallocation
3190     */
3191 
3192     /* first get start and end of "diagonal" columns */
3193     if (csize == PETSC_DECIDE) {
3194       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3195       if (mglobal == n) { /* square matrix */
3196 	nlocal = m;
3197       } else {
3198         nlocal = n/size + ((n % size) > rank);
3199       }
3200     } else {
3201       nlocal = csize;
3202     }
3203     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3204     rstart = rend - nlocal;
3205     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3206 
3207     /* next, compute all the lengths */
3208     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3209     olens = dlens + m;
3210     for (i=0; i<m; i++) {
3211       jend = ii[i+1] - ii[i];
3212       olen = 0;
3213       dlen = 0;
3214       for (j=0; j<jend; j++) {
3215         if (*jj < rstart || *jj >= rend) olen++;
3216         else dlen++;
3217         jj++;
3218       }
3219       olens[i] = olen;
3220       dlens[i] = dlen;
3221     }
3222     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3223     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3224     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3225     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3226     ierr = PetscFree(dlens);CHKERRQ(ierr);
3227   } else {
3228     PetscInt ml,nl;
3229 
3230     M = *newmat;
3231     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3232     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3233     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3234     /*
3235          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3236        rather than the slower MatSetValues().
3237     */
3238     M->was_assembled = PETSC_TRUE;
3239     M->assembled     = PETSC_FALSE;
3240   }
3241   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3242   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3243   ii  = aij->i;
3244   jj  = aij->j;
3245   aa  = aij->a;
3246   for (i=0; i<m; i++) {
3247     row   = rstart + i;
3248     nz    = ii[i+1] - ii[i];
3249     cwork = jj;     jj += nz;
3250     vwork = aa;     aa += nz;
3251     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3252   }
3253 
3254   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3255   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3256   *newmat = M;
3257 
3258   /* save submatrix used in processor for next request */
3259   if (call ==  MAT_INITIAL_MATRIX) {
3260     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3261     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3262   }
3263 
3264   PetscFunctionReturn(0);
3265 }
3266 
3267 EXTERN_C_BEGIN
3268 #undef __FUNCT__
3269 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3270 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3271 {
3272   PetscInt       m,cstart, cend,j,nnz,i,d;
3273   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3274   const PetscInt *JJ;
3275   PetscScalar    *values;
3276   PetscErrorCode ierr;
3277 
3278   PetscFunctionBegin;
3279   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3280 
3281   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3282   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3283   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3284   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3285   m      = B->rmap->n;
3286   cstart = B->cmap->rstart;
3287   cend   = B->cmap->rend;
3288   rstart = B->rmap->rstart;
3289 
3290   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3291 
3292 #if defined(PETSC_USE_DEBUGGING)
3293   for (i=0; i<m; i++) {
3294     nnz     = Ii[i+1]- Ii[i];
3295     JJ      = J + Ii[i];
3296     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3297     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3298     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3299   }
3300 #endif
3301 
3302   for (i=0; i<m; i++) {
3303     nnz     = Ii[i+1]- Ii[i];
3304     JJ      = J + Ii[i];
3305     nnz_max = PetscMax(nnz_max,nnz);
3306     d       = 0;
3307     for (j=0; j<nnz; j++) {
3308       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3309     }
3310     d_nnz[i] = d;
3311     o_nnz[i] = nnz - d;
3312   }
3313   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3314   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3315 
3316   if (v) values = (PetscScalar*)v;
3317   else {
3318     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3319     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3320   }
3321 
3322   for (i=0; i<m; i++) {
3323     ii   = i + rstart;
3324     nnz  = Ii[i+1]- Ii[i];
3325     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3326   }
3327   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3328   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3329 
3330   if (!v) {
3331     ierr = PetscFree(values);CHKERRQ(ierr);
3332   }
3333   PetscFunctionReturn(0);
3334 }
3335 EXTERN_C_END
3336 
3337 #undef __FUNCT__
3338 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3339 /*@
3340    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3341    (the default parallel PETSc format).
3342 
3343    Collective on MPI_Comm
3344 
3345    Input Parameters:
3346 +  B - the matrix
3347 .  i - the indices into j for the start of each local row (starts with zero)
3348 .  j - the column indices for each local row (starts with zero)
3349 -  v - optional values in the matrix
3350 
3351    Level: developer
3352 
3353    Notes:
3354        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3355      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3356      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3357 
3358        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3359 
3360        The format which is used for the sparse matrix input, is equivalent to a
3361     row-major ordering.. i.e for the following matrix, the input data expected is
3362     as shown:
3363 
3364         1 0 0
3365         2 0 3     P0
3366        -------
3367         4 5 6     P1
3368 
3369      Process0 [P0]: rows_owned=[0,1]
3370         i =  {0,1,3}  [size = nrow+1  = 2+1]
3371         j =  {0,0,2}  [size = nz = 6]
3372         v =  {1,2,3}  [size = nz = 6]
3373 
3374      Process1 [P1]: rows_owned=[2]
3375         i =  {0,3}    [size = nrow+1  = 1+1]
3376         j =  {0,1,2}  [size = nz = 6]
3377         v =  {4,5,6}  [size = nz = 6]
3378 
3379 .keywords: matrix, aij, compressed row, sparse, parallel
3380 
3381 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3382           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3383 @*/
3384 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3385 {
3386   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3387 
3388   PetscFunctionBegin;
3389   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3390   if (f) {
3391     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3392   }
3393   PetscFunctionReturn(0);
3394 }
3395 
3396 #undef __FUNCT__
3397 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3398 /*@C
3399    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3400    (the default parallel PETSc format).  For good matrix assembly performance
3401    the user should preallocate the matrix storage by setting the parameters
3402    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3403    performance can be increased by more than a factor of 50.
3404 
3405    Collective on MPI_Comm
3406 
3407    Input Parameters:
3408 +  A - the matrix
3409 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3410            (same value is used for all local rows)
3411 .  d_nnz - array containing the number of nonzeros in the various rows of the
3412            DIAGONAL portion of the local submatrix (possibly different for each row)
3413            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3414            The size of this array is equal to the number of local rows, i.e 'm'.
3415            You must leave room for the diagonal entry even if it is zero.
3416 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3417            submatrix (same value is used for all local rows).
3418 -  o_nnz - array containing the number of nonzeros in the various rows of the
3419            OFF-DIAGONAL portion of the local submatrix (possibly different for
3420            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3421            structure. The size of this array is equal to the number
3422            of local rows, i.e 'm'.
3423 
3424    If the *_nnz parameter is given then the *_nz parameter is ignored
3425 
3426    The AIJ format (also called the Yale sparse matrix format or
3427    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3428    storage.  The stored row and column indices begin with zero.
3429    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3430 
3431    The parallel matrix is partitioned such that the first m0 rows belong to
3432    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3433    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3434 
3435    The DIAGONAL portion of the local submatrix of a processor can be defined
3436    as the submatrix which is obtained by extraction the part corresponding to
3437    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3438    first row that belongs to the processor, r2 is the last row belonging to
3439    the this processor, and c1-c2 is range of indices of the local part of a
3440    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3441    common case of a square matrix, the row and column ranges are the same and
3442    the DIAGONAL part is also square. The remaining portion of the local
3443    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3444 
3445    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3446 
3447    You can call MatGetInfo() to get information on how effective the preallocation was;
3448    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3449    You can also run with the option -info and look for messages with the string
3450    malloc in them to see if additional memory allocation was needed.
3451 
3452    Example usage:
3453 
3454    Consider the following 8x8 matrix with 34 non-zero values, that is
3455    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3456    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3457    as follows:
3458 
3459 .vb
3460             1  2  0  |  0  3  0  |  0  4
3461     Proc0   0  5  6  |  7  0  0  |  8  0
3462             9  0 10  | 11  0  0  | 12  0
3463     -------------------------------------
3464            13  0 14  | 15 16 17  |  0  0
3465     Proc1   0 18  0  | 19 20 21  |  0  0
3466             0  0  0  | 22 23  0  | 24  0
3467     -------------------------------------
3468     Proc2  25 26 27  |  0  0 28  | 29  0
3469            30  0  0  | 31 32 33  |  0 34
3470 .ve
3471 
3472    This can be represented as a collection of submatrices as:
3473 
3474 .vb
3475       A B C
3476       D E F
3477       G H I
3478 .ve
3479 
3480    Where the submatrices A,B,C are owned by proc0, D,E,F are
3481    owned by proc1, G,H,I are owned by proc2.
3482 
3483    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3484    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3485    The 'M','N' parameters are 8,8, and have the same values on all procs.
3486 
3487    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3488    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3489    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3490    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3491    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3492    matrix, ans [DF] as another SeqAIJ matrix.
3493 
3494    When d_nz, o_nz parameters are specified, d_nz storage elements are
3495    allocated for every row of the local diagonal submatrix, and o_nz
3496    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3497    One way to choose d_nz and o_nz is to use the max nonzerors per local
3498    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3499    In this case, the values of d_nz,o_nz are:
3500 .vb
3501      proc0 : dnz = 2, o_nz = 2
3502      proc1 : dnz = 3, o_nz = 2
3503      proc2 : dnz = 1, o_nz = 4
3504 .ve
3505    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3506    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3507    for proc3. i.e we are using 12+15+10=37 storage locations to store
3508    34 values.
3509 
3510    When d_nnz, o_nnz parameters are specified, the storage is specified
3511    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3512    In the above case the values for d_nnz,o_nnz are:
3513 .vb
3514      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3515      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3516      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3517 .ve
3518    Here the space allocated is sum of all the above values i.e 34, and
3519    hence pre-allocation is perfect.
3520 
3521    Level: intermediate
3522 
3523 .keywords: matrix, aij, compressed row, sparse, parallel
3524 
3525 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3526           MPIAIJ, MatGetInfo()
3527 @*/
3528 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3529 {
3530   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3531 
3532   PetscFunctionBegin;
3533   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3534   if (f) {
3535     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3536   }
3537   PetscFunctionReturn(0);
3538 }
3539 
3540 #undef __FUNCT__
3541 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3542 /*@
3543      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3544          CSR format the local rows.
3545 
3546    Collective on MPI_Comm
3547 
3548    Input Parameters:
3549 +  comm - MPI communicator
3550 .  m - number of local rows (Cannot be PETSC_DECIDE)
3551 .  n - This value should be the same as the local size used in creating the
3552        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3553        calculated if N is given) For square matrices n is almost always m.
3554 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3555 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3556 .   i - row indices
3557 .   j - column indices
3558 -   a - matrix values
3559 
3560    Output Parameter:
3561 .   mat - the matrix
3562 
3563    Level: intermediate
3564 
3565    Notes:
3566        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3567      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3568      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3569 
3570        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3571 
3572        The format which is used for the sparse matrix input, is equivalent to a
3573     row-major ordering.. i.e for the following matrix, the input data expected is
3574     as shown:
3575 
3576         1 0 0
3577         2 0 3     P0
3578        -------
3579         4 5 6     P1
3580 
3581      Process0 [P0]: rows_owned=[0,1]
3582         i =  {0,1,3}  [size = nrow+1  = 2+1]
3583         j =  {0,0,2}  [size = nz = 6]
3584         v =  {1,2,3}  [size = nz = 6]
3585 
3586      Process1 [P1]: rows_owned=[2]
3587         i =  {0,3}    [size = nrow+1  = 1+1]
3588         j =  {0,1,2}  [size = nz = 6]
3589         v =  {4,5,6}  [size = nz = 6]
3590 
3591 .keywords: matrix, aij, compressed row, sparse, parallel
3592 
3593 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3594           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3595 @*/
3596 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3597 {
3598   PetscErrorCode ierr;
3599 
3600  PetscFunctionBegin;
3601   if (i[0]) {
3602     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3603   }
3604   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3605   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3606   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3607   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3608   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3609   PetscFunctionReturn(0);
3610 }
3611 
3612 #undef __FUNCT__
3613 #define __FUNCT__ "MatCreateMPIAIJ"
3614 /*@C
3615    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3616    (the default parallel PETSc format).  For good matrix assembly performance
3617    the user should preallocate the matrix storage by setting the parameters
3618    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3619    performance can be increased by more than a factor of 50.
3620 
3621    Collective on MPI_Comm
3622 
3623    Input Parameters:
3624 +  comm - MPI communicator
3625 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3626            This value should be the same as the local size used in creating the
3627            y vector for the matrix-vector product y = Ax.
3628 .  n - This value should be the same as the local size used in creating the
3629        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3630        calculated if N is given) For square matrices n is almost always m.
3631 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3632 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3633 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3634            (same value is used for all local rows)
3635 .  d_nnz - array containing the number of nonzeros in the various rows of the
3636            DIAGONAL portion of the local submatrix (possibly different for each row)
3637            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3638            The size of this array is equal to the number of local rows, i.e 'm'.
3639            You must leave room for the diagonal entry even if it is zero.
3640 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3641            submatrix (same value is used for all local rows).
3642 -  o_nnz - array containing the number of nonzeros in the various rows of the
3643            OFF-DIAGONAL portion of the local submatrix (possibly different for
3644            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3645            structure. The size of this array is equal to the number
3646            of local rows, i.e 'm'.
3647 
3648    Output Parameter:
3649 .  A - the matrix
3650 
3651    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3652    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3653    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3654 
3655    Notes:
3656    If the *_nnz parameter is given then the *_nz parameter is ignored
3657 
3658    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3659    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3660    storage requirements for this matrix.
3661 
3662    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3663    processor than it must be used on all processors that share the object for
3664    that argument.
3665 
3666    The user MUST specify either the local or global matrix dimensions
3667    (possibly both).
3668 
3669    The parallel matrix is partitioned across processors such that the
3670    first m0 rows belong to process 0, the next m1 rows belong to
3671    process 1, the next m2 rows belong to process 2 etc.. where
3672    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3673    values corresponding to [m x N] submatrix.
3674 
3675    The columns are logically partitioned with the n0 columns belonging
3676    to 0th partition, the next n1 columns belonging to the next
3677    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3678 
3679    The DIAGONAL portion of the local submatrix on any given processor
3680    is the submatrix corresponding to the rows and columns m,n
3681    corresponding to the given processor. i.e diagonal matrix on
3682    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3683    etc. The remaining portion of the local submatrix [m x (N-n)]
3684    constitute the OFF-DIAGONAL portion. The example below better
3685    illustrates this concept.
3686 
3687    For a square global matrix we define each processor's diagonal portion
3688    to be its local rows and the corresponding columns (a square submatrix);
3689    each processor's off-diagonal portion encompasses the remainder of the
3690    local matrix (a rectangular submatrix).
3691 
3692    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3693 
3694    When calling this routine with a single process communicator, a matrix of
3695    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3696    type of communicator, use the construction mechanism:
3697      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3698 
3699    By default, this format uses inodes (identical nodes) when possible.
3700    We search for consecutive rows with the same nonzero structure, thereby
3701    reusing matrix information to achieve increased efficiency.
3702 
3703    Options Database Keys:
3704 +  -mat_no_inode  - Do not use inodes
3705 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3706 -  -mat_aij_oneindex - Internally use indexing starting at 1
3707         rather than 0.  Note that when calling MatSetValues(),
3708         the user still MUST index entries starting at 0!
3709 
3710 
3711    Example usage:
3712 
3713    Consider the following 8x8 matrix with 34 non-zero values, that is
3714    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3715    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3716    as follows:
3717 
3718 .vb
3719             1  2  0  |  0  3  0  |  0  4
3720     Proc0   0  5  6  |  7  0  0  |  8  0
3721             9  0 10  | 11  0  0  | 12  0
3722     -------------------------------------
3723            13  0 14  | 15 16 17  |  0  0
3724     Proc1   0 18  0  | 19 20 21  |  0  0
3725             0  0  0  | 22 23  0  | 24  0
3726     -------------------------------------
3727     Proc2  25 26 27  |  0  0 28  | 29  0
3728            30  0  0  | 31 32 33  |  0 34
3729 .ve
3730 
3731    This can be represented as a collection of submatrices as:
3732 
3733 .vb
3734       A B C
3735       D E F
3736       G H I
3737 .ve
3738 
3739    Where the submatrices A,B,C are owned by proc0, D,E,F are
3740    owned by proc1, G,H,I are owned by proc2.
3741 
3742    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3743    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3744    The 'M','N' parameters are 8,8, and have the same values on all procs.
3745 
3746    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3747    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3748    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3749    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3750    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3751    matrix, ans [DF] as another SeqAIJ matrix.
3752 
3753    When d_nz, o_nz parameters are specified, d_nz storage elements are
3754    allocated for every row of the local diagonal submatrix, and o_nz
3755    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3756    One way to choose d_nz and o_nz is to use the max nonzerors per local
3757    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3758    In this case, the values of d_nz,o_nz are:
3759 .vb
3760      proc0 : dnz = 2, o_nz = 2
3761      proc1 : dnz = 3, o_nz = 2
3762      proc2 : dnz = 1, o_nz = 4
3763 .ve
3764    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3765    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3766    for proc3. i.e we are using 12+15+10=37 storage locations to store
3767    34 values.
3768 
3769    When d_nnz, o_nnz parameters are specified, the storage is specified
3770    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3771    In the above case the values for d_nnz,o_nnz are:
3772 .vb
3773      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3774      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3775      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3776 .ve
3777    Here the space allocated is sum of all the above values i.e 34, and
3778    hence pre-allocation is perfect.
3779 
3780    Level: intermediate
3781 
3782 .keywords: matrix, aij, compressed row, sparse, parallel
3783 
3784 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3785           MPIAIJ, MatCreateMPIAIJWithArrays()
3786 @*/
3787 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3788 {
3789   PetscErrorCode ierr;
3790   PetscMPIInt    size;
3791 
3792   PetscFunctionBegin;
3793   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3794   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3795   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3796   if (size > 1) {
3797     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3798     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3799   } else {
3800     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3801     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3802   }
3803   PetscFunctionReturn(0);
3804 }
3805 
3806 #undef __FUNCT__
3807 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3808 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3809 {
3810   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3811 
3812   PetscFunctionBegin;
3813   *Ad     = a->A;
3814   *Ao     = a->B;
3815   *colmap = a->garray;
3816   PetscFunctionReturn(0);
3817 }
3818 
3819 #undef __FUNCT__
3820 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3821 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3822 {
3823   PetscErrorCode ierr;
3824   PetscInt       i;
3825   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3826 
3827   PetscFunctionBegin;
3828   if (coloring->ctype == IS_COLORING_GLOBAL) {
3829     ISColoringValue *allcolors,*colors;
3830     ISColoring      ocoloring;
3831 
3832     /* set coloring for diagonal portion */
3833     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3834 
3835     /* set coloring for off-diagonal portion */
3836     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3837     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3838     for (i=0; i<a->B->cmap->n; i++) {
3839       colors[i] = allcolors[a->garray[i]];
3840     }
3841     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3842     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3843     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3844     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3845   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3846     ISColoringValue *colors;
3847     PetscInt        *larray;
3848     ISColoring      ocoloring;
3849 
3850     /* set coloring for diagonal portion */
3851     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3852     for (i=0; i<a->A->cmap->n; i++) {
3853       larray[i] = i + A->cmap->rstart;
3854     }
3855     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3856     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3857     for (i=0; i<a->A->cmap->n; i++) {
3858       colors[i] = coloring->colors[larray[i]];
3859     }
3860     ierr = PetscFree(larray);CHKERRQ(ierr);
3861     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3862     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3863     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3864 
3865     /* set coloring for off-diagonal portion */
3866     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3867     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3868     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3869     for (i=0; i<a->B->cmap->n; i++) {
3870       colors[i] = coloring->colors[larray[i]];
3871     }
3872     ierr = PetscFree(larray);CHKERRQ(ierr);
3873     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3874     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3875     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3876   } else {
3877     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3878   }
3879 
3880   PetscFunctionReturn(0);
3881 }
3882 
3883 #if defined(PETSC_HAVE_ADIC)
3884 #undef __FUNCT__
3885 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3886 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3887 {
3888   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3889   PetscErrorCode ierr;
3890 
3891   PetscFunctionBegin;
3892   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3893   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3894   PetscFunctionReturn(0);
3895 }
3896 #endif
3897 
3898 #undef __FUNCT__
3899 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3900 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3901 {
3902   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3903   PetscErrorCode ierr;
3904 
3905   PetscFunctionBegin;
3906   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3907   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3908   PetscFunctionReturn(0);
3909 }
3910 
3911 #undef __FUNCT__
3912 #define __FUNCT__ "MatMerge"
3913 /*@
3914       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3915                  matrices from each processor
3916 
3917     Collective on MPI_Comm
3918 
3919    Input Parameters:
3920 +    comm - the communicators the parallel matrix will live on
3921 .    inmat - the input sequential matrices
3922 .    n - number of local columns (or PETSC_DECIDE)
3923 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3924 
3925    Output Parameter:
3926 .    outmat - the parallel matrix generated
3927 
3928     Level: advanced
3929 
3930    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3931 
3932 @*/
3933 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3934 {
3935   PetscErrorCode ierr;
3936   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3937   PetscInt       *indx;
3938   PetscScalar    *values;
3939 
3940   PetscFunctionBegin;
3941   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3942   if (scall == MAT_INITIAL_MATRIX){
3943     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3944     if (n == PETSC_DECIDE){
3945       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3946     }
3947     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3948     rstart -= m;
3949 
3950     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3951     for (i=0;i<m;i++) {
3952       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3953       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3954       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3955     }
3956     /* This routine will ONLY return MPIAIJ type matrix */
3957     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3958     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3959     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3960     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3961     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3962 
3963   } else if (scall == MAT_REUSE_MATRIX){
3964     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3965   } else {
3966     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3967   }
3968 
3969   for (i=0;i<m;i++) {
3970     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3971     Ii    = i + rstart;
3972     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3973     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3974   }
3975   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3976   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3977   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3978 
3979   PetscFunctionReturn(0);
3980 }
3981 
3982 #undef __FUNCT__
3983 #define __FUNCT__ "MatFileSplit"
3984 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3985 {
3986   PetscErrorCode    ierr;
3987   PetscMPIInt       rank;
3988   PetscInt          m,N,i,rstart,nnz;
3989   size_t            len;
3990   const PetscInt    *indx;
3991   PetscViewer       out;
3992   char              *name;
3993   Mat               B;
3994   const PetscScalar *values;
3995 
3996   PetscFunctionBegin;
3997   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3998   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3999   /* Should this be the type of the diagonal block of A? */
4000   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4001   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4002   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4003   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4004   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4005   for (i=0;i<m;i++) {
4006     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4007     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4008     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4009   }
4010   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4011   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4012 
4013   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4014   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4015   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4016   sprintf(name,"%s.%d",outfile,rank);
4017   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4018   ierr = PetscFree(name);
4019   ierr = MatView(B,out);CHKERRQ(ierr);
4020   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
4021   ierr = MatDestroy(B);CHKERRQ(ierr);
4022   PetscFunctionReturn(0);
4023 }
4024 
4025 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
4026 #undef __FUNCT__
4027 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4028 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4029 {
4030   PetscErrorCode       ierr;
4031   Mat_Merge_SeqsToMPI  *merge;
4032   PetscContainer       container;
4033 
4034   PetscFunctionBegin;
4035   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4036   if (container) {
4037     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4038     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4039     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4040     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4041     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4042     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4043     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4044     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4045     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4046     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4047     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4048     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4049     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4050     ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr);
4051 
4052     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4053     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4054   }
4055   ierr = PetscFree(merge);CHKERRQ(ierr);
4056 
4057   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4058   PetscFunctionReturn(0);
4059 }
4060 
4061 #include "../src/mat/utils/freespace.h"
4062 #include "petscbt.h"
4063 
4064 #undef __FUNCT__
4065 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4066 /*@C
4067       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4068                  matrices from each processor
4069 
4070     Collective on MPI_Comm
4071 
4072    Input Parameters:
4073 +    comm - the communicators the parallel matrix will live on
4074 .    seqmat - the input sequential matrices
4075 .    m - number of local rows (or PETSC_DECIDE)
4076 .    n - number of local columns (or PETSC_DECIDE)
4077 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4078 
4079    Output Parameter:
4080 .    mpimat - the parallel matrix generated
4081 
4082     Level: advanced
4083 
4084    Notes:
4085      The dimensions of the sequential matrix in each processor MUST be the same.
4086      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4087      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4088 @*/
4089 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4090 {
4091   PetscErrorCode       ierr;
4092   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4093   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4094   PetscMPIInt          size,rank,taga,*len_s;
4095   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4096   PetscInt             proc,m;
4097   PetscInt             **buf_ri,**buf_rj;
4098   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4099   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4100   MPI_Request          *s_waits,*r_waits;
4101   MPI_Status           *status;
4102   MatScalar            *aa=a->a;
4103   MatScalar            **abuf_r,*ba_i;
4104   Mat_Merge_SeqsToMPI  *merge;
4105   PetscContainer       container;
4106 
4107   PetscFunctionBegin;
4108   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4109 
4110   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4111   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4112 
4113   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4114   if (container) {
4115     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4116   }
4117   bi     = merge->bi;
4118   bj     = merge->bj;
4119   buf_ri = merge->buf_ri;
4120   buf_rj = merge->buf_rj;
4121 
4122   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4123   owners = merge->rowmap->range;
4124   len_s  = merge->len_s;
4125 
4126   /* send and recv matrix values */
4127   /*-----------------------------*/
4128   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4129   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4130 
4131   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4132   for (proc=0,k=0; proc<size; proc++){
4133     if (!len_s[proc]) continue;
4134     i = owners[proc];
4135     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4136     k++;
4137   }
4138 
4139   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4140   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4141   ierr = PetscFree(status);CHKERRQ(ierr);
4142 
4143   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4144   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4145 
4146   /* insert mat values of mpimat */
4147   /*----------------------------*/
4148   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4149   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4150 
4151   for (k=0; k<merge->nrecv; k++){
4152     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4153     nrows = *(buf_ri_k[k]);
4154     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4155     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4156   }
4157 
4158   /* set values of ba */
4159   m = merge->rowmap->n;
4160   for (i=0; i<m; i++) {
4161     arow = owners[rank] + i;
4162     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4163     bnzi = bi[i+1] - bi[i];
4164     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4165 
4166     /* add local non-zero vals of this proc's seqmat into ba */
4167     anzi = ai[arow+1] - ai[arow];
4168     aj   = a->j + ai[arow];
4169     aa   = a->a + ai[arow];
4170     nextaj = 0;
4171     for (j=0; nextaj<anzi; j++){
4172       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4173         ba_i[j] += aa[nextaj++];
4174       }
4175     }
4176 
4177     /* add received vals into ba */
4178     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4179       /* i-th row */
4180       if (i == *nextrow[k]) {
4181         anzi = *(nextai[k]+1) - *nextai[k];
4182         aj   = buf_rj[k] + *(nextai[k]);
4183         aa   = abuf_r[k] + *(nextai[k]);
4184         nextaj = 0;
4185         for (j=0; nextaj<anzi; j++){
4186           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4187             ba_i[j] += aa[nextaj++];
4188           }
4189         }
4190         nextrow[k]++; nextai[k]++;
4191       }
4192     }
4193     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4194   }
4195   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4196   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4197 
4198   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4199   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4200   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4201   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4202   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4203   PetscFunctionReturn(0);
4204 }
4205 
4206 #undef __FUNCT__
4207 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4208 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4209 {
4210   PetscErrorCode       ierr;
4211   Mat                  B_mpi;
4212   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4213   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4214   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4215   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4216   PetscInt             len,proc,*dnz,*onz;
4217   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4218   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4219   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4220   MPI_Status           *status;
4221   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4222   PetscBT              lnkbt;
4223   Mat_Merge_SeqsToMPI  *merge;
4224   PetscContainer       container;
4225 
4226   PetscFunctionBegin;
4227   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4228 
4229   /* make sure it is a PETSc comm */
4230   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4231   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4232   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4233 
4234   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4235   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4236 
4237   /* determine row ownership */
4238   /*---------------------------------------------------------*/
4239   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4240   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4241   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4242   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4243   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4244   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4245   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4246 
4247   m      = merge->rowmap->n;
4248   M      = merge->rowmap->N;
4249   owners = merge->rowmap->range;
4250 
4251   /* determine the number of messages to send, their lengths */
4252   /*---------------------------------------------------------*/
4253   len_s  = merge->len_s;
4254 
4255   len = 0;  /* length of buf_si[] */
4256   merge->nsend = 0;
4257   for (proc=0; proc<size; proc++){
4258     len_si[proc] = 0;
4259     if (proc == rank){
4260       len_s[proc] = 0;
4261     } else {
4262       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4263       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4264     }
4265     if (len_s[proc]) {
4266       merge->nsend++;
4267       nrows = 0;
4268       for (i=owners[proc]; i<owners[proc+1]; i++){
4269         if (ai[i+1] > ai[i]) nrows++;
4270       }
4271       len_si[proc] = 2*(nrows+1);
4272       len += len_si[proc];
4273     }
4274   }
4275 
4276   /* determine the number and length of messages to receive for ij-structure */
4277   /*-------------------------------------------------------------------------*/
4278   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4279   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4280 
4281   /* post the Irecv of j-structure */
4282   /*-------------------------------*/
4283   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4284   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4285 
4286   /* post the Isend of j-structure */
4287   /*--------------------------------*/
4288   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4289 
4290   for (proc=0, k=0; proc<size; proc++){
4291     if (!len_s[proc]) continue;
4292     i = owners[proc];
4293     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4294     k++;
4295   }
4296 
4297   /* receives and sends of j-structure are complete */
4298   /*------------------------------------------------*/
4299   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4300   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4301 
4302   /* send and recv i-structure */
4303   /*---------------------------*/
4304   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4305   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4306 
4307   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4308   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4309   for (proc=0,k=0; proc<size; proc++){
4310     if (!len_s[proc]) continue;
4311     /* form outgoing message for i-structure:
4312          buf_si[0]:                 nrows to be sent
4313                [1:nrows]:           row index (global)
4314                [nrows+1:2*nrows+1]: i-structure index
4315     */
4316     /*-------------------------------------------*/
4317     nrows = len_si[proc]/2 - 1;
4318     buf_si_i    = buf_si + nrows+1;
4319     buf_si[0]   = nrows;
4320     buf_si_i[0] = 0;
4321     nrows = 0;
4322     for (i=owners[proc]; i<owners[proc+1]; i++){
4323       anzi = ai[i+1] - ai[i];
4324       if (anzi) {
4325         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4326         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4327         nrows++;
4328       }
4329     }
4330     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4331     k++;
4332     buf_si += len_si[proc];
4333   }
4334 
4335   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4336   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4337 
4338   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4339   for (i=0; i<merge->nrecv; i++){
4340     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4341   }
4342 
4343   ierr = PetscFree(len_si);CHKERRQ(ierr);
4344   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4345   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4346   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4347   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4348   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4349   ierr = PetscFree(status);CHKERRQ(ierr);
4350 
4351   /* compute a local seq matrix in each processor */
4352   /*----------------------------------------------*/
4353   /* allocate bi array and free space for accumulating nonzero column info */
4354   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4355   bi[0] = 0;
4356 
4357   /* create and initialize a linked list */
4358   nlnk = N+1;
4359   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4360 
4361   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4362   len = 0;
4363   len  = ai[owners[rank+1]] - ai[owners[rank]];
4364   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4365   current_space = free_space;
4366 
4367   /* determine symbolic info for each local row */
4368   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4369 
4370   for (k=0; k<merge->nrecv; k++){
4371     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4372     nrows = *buf_ri_k[k];
4373     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4374     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4375   }
4376 
4377   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4378   len = 0;
4379   for (i=0;i<m;i++) {
4380     bnzi   = 0;
4381     /* add local non-zero cols of this proc's seqmat into lnk */
4382     arow   = owners[rank] + i;
4383     anzi   = ai[arow+1] - ai[arow];
4384     aj     = a->j + ai[arow];
4385     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4386     bnzi += nlnk;
4387     /* add received col data into lnk */
4388     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4389       if (i == *nextrow[k]) { /* i-th row */
4390         anzi = *(nextai[k]+1) - *nextai[k];
4391         aj   = buf_rj[k] + *nextai[k];
4392         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4393         bnzi += nlnk;
4394         nextrow[k]++; nextai[k]++;
4395       }
4396     }
4397     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4398 
4399     /* if free space is not available, make more free space */
4400     if (current_space->local_remaining<bnzi) {
4401       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4402       nspacedouble++;
4403     }
4404     /* copy data into free space, then initialize lnk */
4405     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4406     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4407 
4408     current_space->array           += bnzi;
4409     current_space->local_used      += bnzi;
4410     current_space->local_remaining -= bnzi;
4411 
4412     bi[i+1] = bi[i] + bnzi;
4413   }
4414 
4415   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4416 
4417   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4418   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4419   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4420 
4421   /* create symbolic parallel matrix B_mpi */
4422   /*---------------------------------------*/
4423   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4424   if (n==PETSC_DECIDE) {
4425     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4426   } else {
4427     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4428   }
4429   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4430   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4431   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4432 
4433   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4434   B_mpi->assembled     = PETSC_FALSE;
4435   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4436   merge->bi            = bi;
4437   merge->bj            = bj;
4438   merge->buf_ri        = buf_ri;
4439   merge->buf_rj        = buf_rj;
4440   merge->coi           = PETSC_NULL;
4441   merge->coj           = PETSC_NULL;
4442   merge->owners_co     = PETSC_NULL;
4443 
4444   /* attach the supporting struct to B_mpi for reuse */
4445   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4446   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4447   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4448   *mpimat = B_mpi;
4449 
4450   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4451   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4452   PetscFunctionReturn(0);
4453 }
4454 
4455 #undef __FUNCT__
4456 #define __FUNCT__ "MatMerge_SeqsToMPI"
4457 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4458 {
4459   PetscErrorCode   ierr;
4460 
4461   PetscFunctionBegin;
4462   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4463   if (scall == MAT_INITIAL_MATRIX){
4464     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4465   }
4466   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4467   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4468   PetscFunctionReturn(0);
4469 }
4470 
4471 #undef __FUNCT__
4472 #define __FUNCT__ "MatGetLocalMat"
4473 /*@
4474      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4475 
4476     Not Collective
4477 
4478    Input Parameters:
4479 +    A - the matrix
4480 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4481 
4482    Output Parameter:
4483 .    A_loc - the local sequential matrix generated
4484 
4485     Level: developer
4486 
4487 @*/
4488 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4489 {
4490   PetscErrorCode  ierr;
4491   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4492   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4493   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4494   MatScalar       *aa=a->a,*ba=b->a,*cam;
4495   PetscScalar     *ca;
4496   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4497   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4498 
4499   PetscFunctionBegin;
4500   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4501   if (scall == MAT_INITIAL_MATRIX){
4502     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4503     ci[0] = 0;
4504     for (i=0; i<am; i++){
4505       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4506     }
4507     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4508     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4509     k = 0;
4510     for (i=0; i<am; i++) {
4511       ncols_o = bi[i+1] - bi[i];
4512       ncols_d = ai[i+1] - ai[i];
4513       /* off-diagonal portion of A */
4514       for (jo=0; jo<ncols_o; jo++) {
4515         col = cmap[*bj];
4516         if (col >= cstart) break;
4517         cj[k]   = col; bj++;
4518         ca[k++] = *ba++;
4519       }
4520       /* diagonal portion of A */
4521       for (j=0; j<ncols_d; j++) {
4522         cj[k]   = cstart + *aj++;
4523         ca[k++] = *aa++;
4524       }
4525       /* off-diagonal portion of A */
4526       for (j=jo; j<ncols_o; j++) {
4527         cj[k]   = cmap[*bj++];
4528         ca[k++] = *ba++;
4529       }
4530     }
4531     /* put together the new matrix */
4532     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4533     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4534     /* Since these are PETSc arrays, change flags to free them as necessary. */
4535     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4536     mat->free_a  = PETSC_TRUE;
4537     mat->free_ij = PETSC_TRUE;
4538     mat->nonew   = 0;
4539   } else if (scall == MAT_REUSE_MATRIX){
4540     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4541     ci = mat->i; cj = mat->j; cam = mat->a;
4542     for (i=0; i<am; i++) {
4543       /* off-diagonal portion of A */
4544       ncols_o = bi[i+1] - bi[i];
4545       for (jo=0; jo<ncols_o; jo++) {
4546         col = cmap[*bj];
4547         if (col >= cstart) break;
4548         *cam++ = *ba++; bj++;
4549       }
4550       /* diagonal portion of A */
4551       ncols_d = ai[i+1] - ai[i];
4552       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4553       /* off-diagonal portion of A */
4554       for (j=jo; j<ncols_o; j++) {
4555         *cam++ = *ba++; bj++;
4556       }
4557     }
4558   } else {
4559     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4560   }
4561 
4562   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4563   PetscFunctionReturn(0);
4564 }
4565 
4566 #undef __FUNCT__
4567 #define __FUNCT__ "MatGetLocalMatCondensed"
4568 /*@C
4569      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4570 
4571     Not Collective
4572 
4573    Input Parameters:
4574 +    A - the matrix
4575 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4576 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4577 
4578    Output Parameter:
4579 .    A_loc - the local sequential matrix generated
4580 
4581     Level: developer
4582 
4583 @*/
4584 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4585 {
4586   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4587   PetscErrorCode    ierr;
4588   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4589   IS                isrowa,iscola;
4590   Mat               *aloc;
4591 
4592   PetscFunctionBegin;
4593   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4594   if (!row){
4595     start = A->rmap->rstart; end = A->rmap->rend;
4596     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4597   } else {
4598     isrowa = *row;
4599   }
4600   if (!col){
4601     start = A->cmap->rstart;
4602     cmap  = a->garray;
4603     nzA   = a->A->cmap->n;
4604     nzB   = a->B->cmap->n;
4605     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4606     ncols = 0;
4607     for (i=0; i<nzB; i++) {
4608       if (cmap[i] < start) idx[ncols++] = cmap[i];
4609       else break;
4610     }
4611     imark = i;
4612     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4613     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4614     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4615     ierr = PetscFree(idx);CHKERRQ(ierr);
4616   } else {
4617     iscola = *col;
4618   }
4619   if (scall != MAT_INITIAL_MATRIX){
4620     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4621     aloc[0] = *A_loc;
4622   }
4623   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4624   *A_loc = aloc[0];
4625   ierr = PetscFree(aloc);CHKERRQ(ierr);
4626   if (!row){
4627     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4628   }
4629   if (!col){
4630     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4631   }
4632   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4633   PetscFunctionReturn(0);
4634 }
4635 
4636 #undef __FUNCT__
4637 #define __FUNCT__ "MatGetBrowsOfAcols"
4638 /*@C
4639     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4640 
4641     Collective on Mat
4642 
4643    Input Parameters:
4644 +    A,B - the matrices in mpiaij format
4645 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4646 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4647 
4648    Output Parameter:
4649 +    rowb, colb - index sets of rows and columns of B to extract
4650 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4651 -    B_seq - the sequential matrix generated
4652 
4653     Level: developer
4654 
4655 @*/
4656 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4657 {
4658   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4659   PetscErrorCode    ierr;
4660   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4661   IS                isrowb,iscolb;
4662   Mat               *bseq;
4663 
4664   PetscFunctionBegin;
4665   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4666     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4667   }
4668   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4669 
4670   if (scall == MAT_INITIAL_MATRIX){
4671     start = A->cmap->rstart;
4672     cmap  = a->garray;
4673     nzA   = a->A->cmap->n;
4674     nzB   = a->B->cmap->n;
4675     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4676     ncols = 0;
4677     for (i=0; i<nzB; i++) {  /* row < local row index */
4678       if (cmap[i] < start) idx[ncols++] = cmap[i];
4679       else break;
4680     }
4681     imark = i;
4682     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4683     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4684     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4685     ierr = PetscFree(idx);CHKERRQ(ierr);
4686     *brstart = imark;
4687     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4688   } else {
4689     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4690     isrowb = *rowb; iscolb = *colb;
4691     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4692     bseq[0] = *B_seq;
4693   }
4694   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4695   *B_seq = bseq[0];
4696   ierr = PetscFree(bseq);CHKERRQ(ierr);
4697   if (!rowb){
4698     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4699   } else {
4700     *rowb = isrowb;
4701   }
4702   if (!colb){
4703     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4704   } else {
4705     *colb = iscolb;
4706   }
4707   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4708   PetscFunctionReturn(0);
4709 }
4710 
4711 #undef __FUNCT__
4712 #define __FUNCT__ "MatGetBrowsOfAoCols"
4713 /*@C
4714     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4715     of the OFF-DIAGONAL portion of local A
4716 
4717     Collective on Mat
4718 
4719    Input Parameters:
4720 +    A,B - the matrices in mpiaij format
4721 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4722 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4723 .    startsj_r - similar to startsj for receives
4724 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4725 
4726    Output Parameter:
4727 +    B_oth - the sequential matrix generated
4728 
4729     Level: developer
4730 
4731 @*/
4732 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4733 {
4734   VecScatter_MPI_General *gen_to,*gen_from;
4735   PetscErrorCode         ierr;
4736   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4737   Mat_SeqAIJ             *b_oth;
4738   VecScatter             ctx=a->Mvctx;
4739   MPI_Comm               comm=((PetscObject)ctx)->comm;
4740   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4741   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4742   PetscScalar            *rvalues,*svalues;
4743   MatScalar              *b_otha,*bufa,*bufA;
4744   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4745   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4746   MPI_Status             *sstatus,rstatus;
4747   PetscMPIInt            jj;
4748   PetscInt               *cols,sbs,rbs;
4749   PetscScalar            *vals;
4750 
4751   PetscFunctionBegin;
4752   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4753     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4754   }
4755   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4756   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4757 
4758   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4759   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4760   rvalues  = gen_from->values; /* holds the length of receiving row */
4761   svalues  = gen_to->values;   /* holds the length of sending row */
4762   nrecvs   = gen_from->n;
4763   nsends   = gen_to->n;
4764 
4765   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4766   srow     = gen_to->indices;   /* local row index to be sent */
4767   sstarts  = gen_to->starts;
4768   sprocs   = gen_to->procs;
4769   sstatus  = gen_to->sstatus;
4770   sbs      = gen_to->bs;
4771   rstarts  = gen_from->starts;
4772   rprocs   = gen_from->procs;
4773   rbs      = gen_from->bs;
4774 
4775   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4776   if (scall == MAT_INITIAL_MATRIX){
4777     /* i-array */
4778     /*---------*/
4779     /*  post receives */
4780     for (i=0; i<nrecvs; i++){
4781       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4782       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4783       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4784     }
4785 
4786     /* pack the outgoing message */
4787     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
4788     sstartsj[0] = 0;  rstartsj[0] = 0;
4789     len = 0; /* total length of j or a array to be sent */
4790     k = 0;
4791     for (i=0; i<nsends; i++){
4792       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4793       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4794       for (j=0; j<nrows; j++) {
4795         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4796         for (l=0; l<sbs; l++){
4797           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4798           rowlen[j*sbs+l] = ncols;
4799           len += ncols;
4800           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4801         }
4802         k++;
4803       }
4804       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4805       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4806     }
4807     /* recvs and sends of i-array are completed */
4808     i = nrecvs;
4809     while (i--) {
4810       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4811     }
4812     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4813 
4814     /* allocate buffers for sending j and a arrays */
4815     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4816     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4817 
4818     /* create i-array of B_oth */
4819     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4820     b_othi[0] = 0;
4821     len = 0; /* total length of j or a array to be received */
4822     k = 0;
4823     for (i=0; i<nrecvs; i++){
4824       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4825       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4826       for (j=0; j<nrows; j++) {
4827         b_othi[k+1] = b_othi[k] + rowlen[j];
4828         len += rowlen[j]; k++;
4829       }
4830       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4831     }
4832 
4833     /* allocate space for j and a arrrays of B_oth */
4834     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4835     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4836 
4837     /* j-array */
4838     /*---------*/
4839     /*  post receives of j-array */
4840     for (i=0; i<nrecvs; i++){
4841       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4842       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4843     }
4844 
4845     /* pack the outgoing message j-array */
4846     k = 0;
4847     for (i=0; i<nsends; i++){
4848       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4849       bufJ = bufj+sstartsj[i];
4850       for (j=0; j<nrows; j++) {
4851         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4852         for (ll=0; ll<sbs; ll++){
4853           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4854           for (l=0; l<ncols; l++){
4855             *bufJ++ = cols[l];
4856           }
4857           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4858         }
4859       }
4860       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4861     }
4862 
4863     /* recvs and sends of j-array are completed */
4864     i = nrecvs;
4865     while (i--) {
4866       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4867     }
4868     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4869   } else if (scall == MAT_REUSE_MATRIX){
4870     sstartsj = *startsj;
4871     rstartsj = *startsj_r;
4872     bufa     = *bufa_ptr;
4873     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4874     b_otha   = b_oth->a;
4875   } else {
4876     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4877   }
4878 
4879   /* a-array */
4880   /*---------*/
4881   /*  post receives of a-array */
4882   for (i=0; i<nrecvs; i++){
4883     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4884     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4885   }
4886 
4887   /* pack the outgoing message a-array */
4888   k = 0;
4889   for (i=0; i<nsends; i++){
4890     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4891     bufA = bufa+sstartsj[i];
4892     for (j=0; j<nrows; j++) {
4893       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4894       for (ll=0; ll<sbs; ll++){
4895         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4896         for (l=0; l<ncols; l++){
4897           *bufA++ = vals[l];
4898         }
4899         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4900       }
4901     }
4902     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4903   }
4904   /* recvs and sends of a-array are completed */
4905   i = nrecvs;
4906   while (i--) {
4907     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4908   }
4909   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4910   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4911 
4912   if (scall == MAT_INITIAL_MATRIX){
4913     /* put together the new matrix */
4914     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4915 
4916     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4917     /* Since these are PETSc arrays, change flags to free them as necessary. */
4918     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4919     b_oth->free_a  = PETSC_TRUE;
4920     b_oth->free_ij = PETSC_TRUE;
4921     b_oth->nonew   = 0;
4922 
4923     ierr = PetscFree(bufj);CHKERRQ(ierr);
4924     if (!startsj || !bufa_ptr){
4925       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4926       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4927     } else {
4928       *startsj   = sstartsj;
4929       *startsj_r = rstartsj;
4930       *bufa_ptr  = bufa;
4931     }
4932   }
4933   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4934   PetscFunctionReturn(0);
4935 }
4936 
4937 #undef __FUNCT__
4938 #define __FUNCT__ "MatGetCommunicationStructs"
4939 /*@C
4940   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4941 
4942   Not Collective
4943 
4944   Input Parameters:
4945 . A - The matrix in mpiaij format
4946 
4947   Output Parameter:
4948 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4949 . colmap - A map from global column index to local index into lvec
4950 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4951 
4952   Level: developer
4953 
4954 @*/
4955 #if defined (PETSC_USE_CTABLE)
4956 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4957 #else
4958 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4959 #endif
4960 {
4961   Mat_MPIAIJ *a;
4962 
4963   PetscFunctionBegin;
4964   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4965   PetscValidPointer(lvec, 2);
4966   PetscValidPointer(colmap, 3);
4967   PetscValidPointer(multScatter, 4);
4968   a = (Mat_MPIAIJ *) A->data;
4969   if (lvec) *lvec = a->lvec;
4970   if (colmap) *colmap = a->colmap;
4971   if (multScatter) *multScatter = a->Mvctx;
4972   PetscFunctionReturn(0);
4973 }
4974 
4975 EXTERN_C_BEGIN
4976 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
4977 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
4978 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
4979 EXTERN_C_END
4980 
4981 #undef __FUNCT__
4982 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4983 /*
4984     Computes (B'*A')' since computing B*A directly is untenable
4985 
4986                n                       p                          p
4987         (              )       (              )         (                  )
4988       m (      A       )  *  n (       B      )   =   m (         C        )
4989         (              )       (              )         (                  )
4990 
4991 */
4992 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4993 {
4994   PetscErrorCode     ierr;
4995   Mat                At,Bt,Ct;
4996 
4997   PetscFunctionBegin;
4998   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4999   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5000   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5001   ierr = MatDestroy(At);CHKERRQ(ierr);
5002   ierr = MatDestroy(Bt);CHKERRQ(ierr);
5003   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5004   ierr = MatDestroy(Ct);CHKERRQ(ierr);
5005   PetscFunctionReturn(0);
5006 }
5007 
5008 #undef __FUNCT__
5009 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5010 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5011 {
5012   PetscErrorCode ierr;
5013   PetscInt       m=A->rmap->n,n=B->cmap->n;
5014   Mat            Cmat;
5015 
5016   PetscFunctionBegin;
5017   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5018   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5019   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5020   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5021   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5022   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5023   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5024   *C   = Cmat;
5025   PetscFunctionReturn(0);
5026 }
5027 
5028 /* ----------------------------------------------------------------*/
5029 #undef __FUNCT__
5030 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5031 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5032 {
5033   PetscErrorCode ierr;
5034 
5035   PetscFunctionBegin;
5036   if (scall == MAT_INITIAL_MATRIX){
5037     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5038   }
5039   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5040   PetscFunctionReturn(0);
5041 }
5042 
5043 EXTERN_C_BEGIN
5044 #if defined(PETSC_HAVE_MUMPS)
5045 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5046 #endif
5047 #if defined(PETSC_HAVE_PASTIX)
5048 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5049 #endif
5050 #if defined(PETSC_HAVE_SUPERLU_DIST)
5051 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5052 #endif
5053 #if defined(PETSC_HAVE_SPOOLES)
5054 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5055 #endif
5056 EXTERN_C_END
5057 
5058 /*MC
5059    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5060 
5061    Options Database Keys:
5062 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5063 
5064   Level: beginner
5065 
5066 .seealso: MatCreateMPIAIJ()
5067 M*/
5068 
5069 EXTERN_C_BEGIN
5070 #undef __FUNCT__
5071 #define __FUNCT__ "MatCreate_MPIAIJ"
5072 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
5073 {
5074   Mat_MPIAIJ     *b;
5075   PetscErrorCode ierr;
5076   PetscMPIInt    size;
5077 
5078   PetscFunctionBegin;
5079   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5080 
5081   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5082   B->data         = (void*)b;
5083   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5084   B->rmap->bs     = 1;
5085   B->assembled    = PETSC_FALSE;
5086   B->mapping      = 0;
5087 
5088   B->insertmode   = NOT_SET_VALUES;
5089   b->size         = size;
5090   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5091 
5092   /* build cache for off array entries formed */
5093   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5094   b->donotstash  = PETSC_FALSE;
5095   b->colmap      = 0;
5096   b->garray      = 0;
5097   b->roworiented = PETSC_TRUE;
5098 
5099   /* stuff used for matrix vector multiply */
5100   b->lvec      = PETSC_NULL;
5101   b->Mvctx     = PETSC_NULL;
5102 
5103   /* stuff for MatGetRow() */
5104   b->rowindices   = 0;
5105   b->rowvalues    = 0;
5106   b->getrowactive = PETSC_FALSE;
5107 
5108 #if defined(PETSC_HAVE_SPOOLES)
5109   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5110                                      "MatGetFactor_mpiaij_spooles",
5111                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5112 #endif
5113 #if defined(PETSC_HAVE_MUMPS)
5114   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5115                                      "MatGetFactor_aij_mumps",
5116                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5117 #endif
5118 #if defined(PETSC_HAVE_PASTIX)
5119   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5120 					   "MatGetFactor_mpiaij_pastix",
5121 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5122 #endif
5123 #if defined(PETSC_HAVE_SUPERLU_DIST)
5124   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5125                                      "MatGetFactor_mpiaij_superlu_dist",
5126                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5127 #endif
5128   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5129                                      "MatStoreValues_MPIAIJ",
5130                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5131   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5132                                      "MatRetrieveValues_MPIAIJ",
5133                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5134   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5135 				     "MatGetDiagonalBlock_MPIAIJ",
5136                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5137   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5138 				     "MatIsTranspose_MPIAIJ",
5139 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5140   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5141 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5142 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5143   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5144 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5145 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5146   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5147 				     "MatDiagonalScaleLocal_MPIAIJ",
5148 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5149   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5150                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5151                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5152   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5153                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5154                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5155   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5156                                      "MatConvert_MPIAIJ_MPISBAIJ",
5157                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5158   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5159                                      "MatMatMult_MPIDense_MPIAIJ",
5160                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5161   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5162                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5163                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5164   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5165                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5166                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5167   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5168   PetscFunctionReturn(0);
5169 }
5170 EXTERN_C_END
5171 
5172 #undef __FUNCT__
5173 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5174 /*@
5175      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5176          and "off-diagonal" part of the matrix in CSR format.
5177 
5178    Collective on MPI_Comm
5179 
5180    Input Parameters:
5181 +  comm - MPI communicator
5182 .  m - number of local rows (Cannot be PETSC_DECIDE)
5183 .  n - This value should be the same as the local size used in creating the
5184        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5185        calculated if N is given) For square matrices n is almost always m.
5186 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5187 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5188 .   i - row indices for "diagonal" portion of matrix
5189 .   j - column indices
5190 .   a - matrix values
5191 .   oi - row indices for "off-diagonal" portion of matrix
5192 .   oj - column indices
5193 -   oa - matrix values
5194 
5195    Output Parameter:
5196 .   mat - the matrix
5197 
5198    Level: advanced
5199 
5200    Notes:
5201        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5202 
5203        The i and j indices are 0 based
5204 
5205        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5206 
5207        This sets local rows and cannot be used to set off-processor values.
5208 
5209        You cannot later use MatSetValues() to change values in this matrix.
5210 
5211 .keywords: matrix, aij, compressed row, sparse, parallel
5212 
5213 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5214           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5215 @*/
5216 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5217 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5218 {
5219   PetscErrorCode ierr;
5220   Mat_MPIAIJ     *maij;
5221 
5222  PetscFunctionBegin;
5223   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5224   if (i[0]) {
5225     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5226   }
5227   if (oi[0]) {
5228     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5229   }
5230   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5231   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5232   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5233   maij = (Mat_MPIAIJ*) (*mat)->data;
5234   maij->donotstash     = PETSC_TRUE;
5235   (*mat)->preallocated = PETSC_TRUE;
5236 
5237   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5238   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5239   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5240   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5241 
5242   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5243   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5244 
5245   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5246   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5247   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5248   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5249 
5250   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5251   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5252   PetscFunctionReturn(0);
5253 }
5254 
5255 /*
5256     Special version for direct calls from Fortran
5257 */
5258 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5259 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5260 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5261 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5262 #endif
5263 
5264 /* Change these macros so can be used in void function */
5265 #undef CHKERRQ
5266 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5267 #undef SETERRQ2
5268 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5269 #undef SETERRQ
5270 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5271 
5272 EXTERN_C_BEGIN
5273 #undef __FUNCT__
5274 #define __FUNCT__ "matsetvaluesmpiaij_"
5275 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5276 {
5277   Mat             mat = *mmat;
5278   PetscInt        m = *mm, n = *mn;
5279   InsertMode      addv = *maddv;
5280   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5281   PetscScalar     value;
5282   PetscErrorCode  ierr;
5283 
5284   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5285   if (mat->insertmode == NOT_SET_VALUES) {
5286     mat->insertmode = addv;
5287   }
5288 #if defined(PETSC_USE_DEBUG)
5289   else if (mat->insertmode != addv) {
5290     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5291   }
5292 #endif
5293   {
5294   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5295   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5296   PetscTruth      roworiented = aij->roworiented;
5297 
5298   /* Some Variables required in the macro */
5299   Mat             A = aij->A;
5300   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5301   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5302   MatScalar       *aa = a->a;
5303   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5304   Mat             B = aij->B;
5305   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5306   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5307   MatScalar       *ba = b->a;
5308 
5309   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5310   PetscInt        nonew = a->nonew;
5311   MatScalar       *ap1,*ap2;
5312 
5313   PetscFunctionBegin;
5314   for (i=0; i<m; i++) {
5315     if (im[i] < 0) continue;
5316 #if defined(PETSC_USE_DEBUG)
5317     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5318 #endif
5319     if (im[i] >= rstart && im[i] < rend) {
5320       row      = im[i] - rstart;
5321       lastcol1 = -1;
5322       rp1      = aj + ai[row];
5323       ap1      = aa + ai[row];
5324       rmax1    = aimax[row];
5325       nrow1    = ailen[row];
5326       low1     = 0;
5327       high1    = nrow1;
5328       lastcol2 = -1;
5329       rp2      = bj + bi[row];
5330       ap2      = ba + bi[row];
5331       rmax2    = bimax[row];
5332       nrow2    = bilen[row];
5333       low2     = 0;
5334       high2    = nrow2;
5335 
5336       for (j=0; j<n; j++) {
5337         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5338         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5339         if (in[j] >= cstart && in[j] < cend){
5340           col = in[j] - cstart;
5341           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5342         } else if (in[j] < 0) continue;
5343 #if defined(PETSC_USE_DEBUG)
5344         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5345 #endif
5346         else {
5347           if (mat->was_assembled) {
5348             if (!aij->colmap) {
5349               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5350             }
5351 #if defined (PETSC_USE_CTABLE)
5352             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5353 	    col--;
5354 #else
5355             col = aij->colmap[in[j]] - 1;
5356 #endif
5357             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5358               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5359               col =  in[j];
5360               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5361               B = aij->B;
5362               b = (Mat_SeqAIJ*)B->data;
5363               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5364               rp2      = bj + bi[row];
5365               ap2      = ba + bi[row];
5366               rmax2    = bimax[row];
5367               nrow2    = bilen[row];
5368               low2     = 0;
5369               high2    = nrow2;
5370               bm       = aij->B->rmap->n;
5371               ba = b->a;
5372             }
5373           } else col = in[j];
5374           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5375         }
5376       }
5377     } else {
5378       if (!aij->donotstash) {
5379         if (roworiented) {
5380           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5381         } else {
5382           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5383         }
5384       }
5385     }
5386   }}
5387   PetscFunctionReturnVoid();
5388 }
5389 EXTERN_C_END
5390 
5391