xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision a3424433cac1fecd9fafe5eee71db3fb754d5af8)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "petscblaslapack.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscTruth     aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap->n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap->n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap->n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap->n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
207 { \
208     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
209     lastcol1 = col;\
210     while (high1-low1 > 5) { \
211       t = (low1+high1)/2; \
212       if (rp1[t] > col) high1 = t; \
213       else             low1  = t; \
214     } \
215       for (_i=low1; _i<high1; _i++) { \
216         if (rp1[_i] > col) break; \
217         if (rp1[_i] == col) { \
218           if (addv == ADD_VALUES) ap1[_i] += value;   \
219           else                    ap1[_i] = value; \
220           goto a_noinsert; \
221         } \
222       }  \
223       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
224       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
225       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
226       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
227       N = nrow1++ - 1; a->nz++; high1++; \
228       /* shift up all the later entries in this row */ \
229       for (ii=N; ii>=_i; ii--) { \
230         rp1[ii+1] = rp1[ii]; \
231         ap1[ii+1] = ap1[ii]; \
232       } \
233       rp1[_i] = col;  \
234       ap1[_i] = value;  \
235       a_noinsert: ; \
236       ailen[row] = nrow1; \
237 }
238 
239 
240 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
241 { \
242     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
243     lastcol2 = col;\
244     while (high2-low2 > 5) { \
245       t = (low2+high2)/2; \
246       if (rp2[t] > col) high2 = t; \
247       else             low2  = t; \
248     } \
249     for (_i=low2; _i<high2; _i++) {		\
250       if (rp2[_i] > col) break;			\
251       if (rp2[_i] == col) {			      \
252 	if (addv == ADD_VALUES) ap2[_i] += value;     \
253 	else                    ap2[_i] = value;      \
254 	goto b_noinsert;			      \
255       }						      \
256     }							      \
257     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
258     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
259     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
260     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
261     N = nrow2++ - 1; b->nz++; high2++;					\
262     /* shift up all the later entries in this row */			\
263     for (ii=N; ii>=_i; ii--) {						\
264       rp2[ii+1] = rp2[ii];						\
265       ap2[ii+1] = ap2[ii];						\
266     }									\
267     rp2[_i] = col;							\
268     ap2[_i] = value;							\
269     b_noinsert: ;								\
270     bilen[row] = nrow2;							\
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
275 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
276 {
277   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
278   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
279   PetscErrorCode ierr;
280   PetscInt       l,*garray = mat->garray,diag;
281 
282   PetscFunctionBegin;
283   /* code only works for square matrices A */
284 
285   /* find size of row to the left of the diagonal part */
286   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
287   row  = row - diag;
288   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
289     if (garray[b->j[b->i[row]+l]] > diag) break;
290   }
291   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
292 
293   /* diagonal part */
294   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
295 
296   /* right of diagonal part */
297   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
298   PetscFunctionReturn(0);
299 }
300 
301 #undef __FUNCT__
302 #define __FUNCT__ "MatSetValues_MPIAIJ"
303 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
304 {
305   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
306   PetscScalar    value;
307   PetscErrorCode ierr;
308   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
309   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
310   PetscTruth     roworiented = aij->roworiented;
311 
312   /* Some Variables required in the macro */
313   Mat            A = aij->A;
314   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
315   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
316   MatScalar      *aa = a->a;
317   PetscTruth     ignorezeroentries = a->ignorezeroentries;
318   Mat            B = aij->B;
319   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
320   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
321   MatScalar      *ba = b->a;
322 
323   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
324   PetscInt       nonew = a->nonew;
325   MatScalar      *ap1,*ap2;
326 
327   PetscFunctionBegin;
328   if (v) PetscValidScalarPointer(v,6);
329   for (i=0; i<m; i++) {
330     if (im[i] < 0) continue;
331 #if defined(PETSC_USE_DEBUG)
332     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
333 #endif
334     if (im[i] >= rstart && im[i] < rend) {
335       row      = im[i] - rstart;
336       lastcol1 = -1;
337       rp1      = aj + ai[row];
338       ap1      = aa + ai[row];
339       rmax1    = aimax[row];
340       nrow1    = ailen[row];
341       low1     = 0;
342       high1    = nrow1;
343       lastcol2 = -1;
344       rp2      = bj + bi[row];
345       ap2      = ba + bi[row];
346       rmax2    = bimax[row];
347       nrow2    = bilen[row];
348       low2     = 0;
349       high2    = nrow2;
350 
351       for (j=0; j<n; j++) {
352         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
353         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
354         if (in[j] >= cstart && in[j] < cend){
355           col = in[j] - cstart;
356           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
357         } else if (in[j] < 0) continue;
358 #if defined(PETSC_USE_DEBUG)
359         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
360 #endif
361         else {
362           if (mat->was_assembled) {
363             if (!aij->colmap) {
364               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
365             }
366 #if defined (PETSC_USE_CTABLE)
367             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
368 	    col--;
369 #else
370             col = aij->colmap[in[j]] - 1;
371 #endif
372             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
373               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
374               col =  in[j];
375               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
376               B = aij->B;
377               b = (Mat_SeqAIJ*)B->data;
378               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
379               rp2      = bj + bi[row];
380               ap2      = ba + bi[row];
381               rmax2    = bimax[row];
382               nrow2    = bilen[row];
383               low2     = 0;
384               high2    = nrow2;
385               bm       = aij->B->rmap->n;
386               ba = b->a;
387             }
388           } else col = in[j];
389           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
390         }
391       }
392     } else {
393       if (!aij->donotstash) {
394         if (roworiented) {
395           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
396         } else {
397           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
398         }
399       }
400     }
401   }
402   PetscFunctionReturn(0);
403 }
404 
405 #undef __FUNCT__
406 #define __FUNCT__ "MatGetValues_MPIAIJ"
407 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
408 {
409   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
410   PetscErrorCode ierr;
411   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
412   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
413 
414   PetscFunctionBegin;
415   for (i=0; i<m; i++) {
416     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
417     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
418     if (idxm[i] >= rstart && idxm[i] < rend) {
419       row = idxm[i] - rstart;
420       for (j=0; j<n; j++) {
421         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
422         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
423         if (idxn[j] >= cstart && idxn[j] < cend){
424           col = idxn[j] - cstart;
425           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
426         } else {
427           if (!aij->colmap) {
428             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
429           }
430 #if defined (PETSC_USE_CTABLE)
431           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
432           col --;
433 #else
434           col = aij->colmap[idxn[j]] - 1;
435 #endif
436           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
437           else {
438             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
439           }
440         }
441       }
442     } else {
443       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
444     }
445   }
446   PetscFunctionReturn(0);
447 }
448 
449 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
453 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
454 {
455   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
456   PetscErrorCode ierr;
457   PetscInt       nstash,reallocs;
458   InsertMode     addv;
459 
460   PetscFunctionBegin;
461   if (aij->donotstash) {
462     PetscFunctionReturn(0);
463   }
464 
465   /* make sure all processors are either in INSERTMODE or ADDMODE */
466   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
467   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
468   mat->insertmode = addv; /* in case this processor had no cache */
469 
470   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
471   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
472   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
473   PetscFunctionReturn(0);
474 }
475 
476 #undef __FUNCT__
477 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
478 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
479 {
480   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
481   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
482   PetscErrorCode ierr;
483   PetscMPIInt    n;
484   PetscInt       i,j,rstart,ncols,flg;
485   PetscInt       *row,*col;
486   PetscTruth     other_disassembled;
487   PetscScalar    *val;
488   InsertMode     addv = mat->insertmode;
489 
490   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
491   PetscFunctionBegin;
492   if (!aij->donotstash) {
493     while (1) {
494       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
495       if (!flg) break;
496 
497       for (i=0; i<n;) {
498         /* Now identify the consecutive vals belonging to the same row */
499         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
500         if (j < n) ncols = j-i;
501         else       ncols = n-i;
502         /* Now assemble all these values with a single function call */
503         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
504         i = j;
505       }
506     }
507     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
508   }
509   a->compressedrow.use     = PETSC_FALSE;
510   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
511   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
512 
513   /* determine if any processor has disassembled, if so we must
514      also disassemble ourselfs, in order that we may reassemble. */
515   /*
516      if nonzero structure of submatrix B cannot change then we know that
517      no processor disassembled thus we can skip this stuff
518   */
519   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
520     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
521     if (mat->was_assembled && !other_disassembled) {
522       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
523     }
524   }
525   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
526     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
527   }
528   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
529   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
530   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
531   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
532 
533   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
534   aij->rowvalues = 0;
535 
536   /* used by MatAXPY() */
537   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
538   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
539 
540   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
541   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
542   PetscFunctionReturn(0);
543 }
544 
545 #undef __FUNCT__
546 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
547 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
548 {
549   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
550   PetscErrorCode ierr;
551 
552   PetscFunctionBegin;
553   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
554   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
555   PetscFunctionReturn(0);
556 }
557 
558 #undef __FUNCT__
559 #define __FUNCT__ "MatZeroRows_MPIAIJ"
560 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
561 {
562   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
563   PetscErrorCode ierr;
564   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
565   PetscInt       i,*owners = A->rmap->range;
566   PetscInt       *nprocs,j,idx,nsends,row;
567   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
568   PetscInt       *rvalues,count,base,slen,*source;
569   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
570   MPI_Comm       comm = ((PetscObject)A)->comm;
571   MPI_Request    *send_waits,*recv_waits;
572   MPI_Status     recv_status,*send_status;
573 #if defined(PETSC_DEBUG)
574   PetscTruth     found = PETSC_FALSE;
575 #endif
576 
577   PetscFunctionBegin;
578   /*  first count number of contributors to each processor */
579   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
580   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
581   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
582   j = 0;
583   for (i=0; i<N; i++) {
584     if (lastidx > (idx = rows[i])) j = 0;
585     lastidx = idx;
586     for (; j<size; j++) {
587       if (idx >= owners[j] && idx < owners[j+1]) {
588         nprocs[2*j]++;
589         nprocs[2*j+1] = 1;
590         owner[i] = j;
591 #if defined(PETSC_DEBUG)
592         found = PETSC_TRUE;
593 #endif
594         break;
595       }
596     }
597 #if defined(PETSC_DEBUG)
598     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
599     found = PETSC_FALSE;
600 #endif
601   }
602   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
603 
604   /* inform other processors of number of messages and max length*/
605   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
606 
607   /* post receives:   */
608   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
609   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
610   for (i=0; i<nrecvs; i++) {
611     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
612   }
613 
614   /* do sends:
615       1) starts[i] gives the starting index in svalues for stuff going to
616          the ith processor
617   */
618   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
619   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
620   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
621   starts[0] = 0;
622   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
623   for (i=0; i<N; i++) {
624     svalues[starts[owner[i]]++] = rows[i];
625   }
626 
627   starts[0] = 0;
628   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
629   count = 0;
630   for (i=0; i<size; i++) {
631     if (nprocs[2*i+1]) {
632       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
633     }
634   }
635   ierr = PetscFree(starts);CHKERRQ(ierr);
636 
637   base = owners[rank];
638 
639   /*  wait on receives */
640   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
641   count  = nrecvs; slen = 0;
642   while (count) {
643     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
644     /* unpack receives into our local space */
645     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
646     source[imdex]  = recv_status.MPI_SOURCE;
647     lens[imdex]    = n;
648     slen          += n;
649     count--;
650   }
651   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
652 
653   /* move the data into the send scatter */
654   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
655   count = 0;
656   for (i=0; i<nrecvs; i++) {
657     values = rvalues + i*nmax;
658     for (j=0; j<lens[i]; j++) {
659       lrows[count++] = values[j] - base;
660     }
661   }
662   ierr = PetscFree(rvalues);CHKERRQ(ierr);
663   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
664   ierr = PetscFree(owner);CHKERRQ(ierr);
665   ierr = PetscFree(nprocs);CHKERRQ(ierr);
666 
667   /* actually zap the local rows */
668   /*
669         Zero the required rows. If the "diagonal block" of the matrix
670      is square and the user wishes to set the diagonal we use separate
671      code so that MatSetValues() is not called for each diagonal allocating
672      new memory, thus calling lots of mallocs and slowing things down.
673 
674   */
675   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
676   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
677   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
678     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
679   } else if (diag != 0.0) {
680     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
681     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
682       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
683 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
684     }
685     for (i = 0; i < slen; i++) {
686       row  = lrows[i] + rstart;
687       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
688     }
689     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
690     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
691   } else {
692     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
693   }
694   ierr = PetscFree(lrows);CHKERRQ(ierr);
695 
696   /* wait on sends */
697   if (nsends) {
698     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
699     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
700     ierr = PetscFree(send_status);CHKERRQ(ierr);
701   }
702   ierr = PetscFree(send_waits);CHKERRQ(ierr);
703   ierr = PetscFree(svalues);CHKERRQ(ierr);
704 
705   PetscFunctionReturn(0);
706 }
707 
708 #undef __FUNCT__
709 #define __FUNCT__ "MatMult_MPIAIJ"
710 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
711 {
712   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
713   PetscErrorCode ierr;
714   PetscInt       nt;
715 
716   PetscFunctionBegin;
717   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
718   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
719   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
720   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
721   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
722   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
723   PetscFunctionReturn(0);
724 }
725 
726 #undef __FUNCT__
727 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
728 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
729 {
730   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
731   PetscErrorCode ierr;
732 
733   PetscFunctionBegin;
734   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
735   PetscFunctionReturn(0);
736 }
737 
738 #undef __FUNCT__
739 #define __FUNCT__ "MatMultAdd_MPIAIJ"
740 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
741 {
742   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
743   PetscErrorCode ierr;
744 
745   PetscFunctionBegin;
746   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
747   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
748   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
749   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
750   PetscFunctionReturn(0);
751 }
752 
753 #undef __FUNCT__
754 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
755 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
756 {
757   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
758   PetscErrorCode ierr;
759   PetscTruth     merged;
760 
761   PetscFunctionBegin;
762   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
763   /* do nondiagonal part */
764   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
765   if (!merged) {
766     /* send it on its way */
767     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
768     /* do local part */
769     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
770     /* receive remote parts: note this assumes the values are not actually */
771     /* added in yy until the next line, */
772     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
773   } else {
774     /* do local part */
775     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
776     /* send it on its way */
777     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
778     /* values actually were received in the Begin() but we need to call this nop */
779     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
780   }
781   PetscFunctionReturn(0);
782 }
783 
784 EXTERN_C_BEGIN
785 #undef __FUNCT__
786 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
787 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
788 {
789   MPI_Comm       comm;
790   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
791   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
792   IS             Me,Notme;
793   PetscErrorCode ierr;
794   PetscInt       M,N,first,last,*notme,i;
795   PetscMPIInt    size;
796 
797   PetscFunctionBegin;
798 
799   /* Easy test: symmetric diagonal block */
800   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
801   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
802   if (!*f) PetscFunctionReturn(0);
803   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
804   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
805   if (size == 1) PetscFunctionReturn(0);
806 
807   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
808   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
809   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
810   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
811   for (i=0; i<first; i++) notme[i] = i;
812   for (i=last; i<M; i++) notme[i-last+first] = i;
813   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
814   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
815   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
816   Aoff = Aoffs[0];
817   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
818   Boff = Boffs[0];
819   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
820   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
821   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
822   ierr = ISDestroy(Me);CHKERRQ(ierr);
823   ierr = ISDestroy(Notme);CHKERRQ(ierr);
824 
825   PetscFunctionReturn(0);
826 }
827 EXTERN_C_END
828 
829 #undef __FUNCT__
830 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
831 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
832 {
833   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
834   PetscErrorCode ierr;
835 
836   PetscFunctionBegin;
837   /* do nondiagonal part */
838   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
839   /* send it on its way */
840   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
841   /* do local part */
842   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
843   /* receive remote parts */
844   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
845   PetscFunctionReturn(0);
846 }
847 
848 /*
849   This only works correctly for square matrices where the subblock A->A is the
850    diagonal block
851 */
852 #undef __FUNCT__
853 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
854 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
855 {
856   PetscErrorCode ierr;
857   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
858 
859   PetscFunctionBegin;
860   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
861   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
862   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
863   PetscFunctionReturn(0);
864 }
865 
866 #undef __FUNCT__
867 #define __FUNCT__ "MatScale_MPIAIJ"
868 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
869 {
870   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
871   PetscErrorCode ierr;
872 
873   PetscFunctionBegin;
874   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
875   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
876   PetscFunctionReturn(0);
877 }
878 
879 #undef __FUNCT__
880 #define __FUNCT__ "MatDestroy_MPIAIJ"
881 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
882 {
883   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
884   PetscErrorCode ierr;
885 
886   PetscFunctionBegin;
887 #if defined(PETSC_USE_LOG)
888   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
889 #endif
890   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
891   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
892   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
893   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
894 #if defined (PETSC_USE_CTABLE)
895   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
896 #else
897   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
898 #endif
899   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
900   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
901   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
902   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
903   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
904   ierr = PetscFree(aij);CHKERRQ(ierr);
905 
906   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
907   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
908   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
909   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
910   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
911   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
912   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
913   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
914   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
915   PetscFunctionReturn(0);
916 }
917 
918 #undef __FUNCT__
919 #define __FUNCT__ "MatView_MPIAIJ_Binary"
920 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
921 {
922   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
923   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
924   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
925   PetscErrorCode    ierr;
926   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
927   int               fd;
928   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
929   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
930   PetscScalar       *column_values;
931 
932   PetscFunctionBegin;
933   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
934   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
935   nz   = A->nz + B->nz;
936   if (!rank) {
937     header[0] = MAT_FILE_CLASSID;
938     header[1] = mat->rmap->N;
939     header[2] = mat->cmap->N;
940     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
941     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
942     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
943     /* get largest number of rows any processor has */
944     rlen = mat->rmap->n;
945     range = mat->rmap->range;
946     for (i=1; i<size; i++) {
947       rlen = PetscMax(rlen,range[i+1] - range[i]);
948     }
949   } else {
950     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
951     rlen = mat->rmap->n;
952   }
953 
954   /* load up the local row counts */
955   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
956   for (i=0; i<mat->rmap->n; i++) {
957     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
958   }
959 
960   /* store the row lengths to the file */
961   if (!rank) {
962     MPI_Status status;
963     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
964     for (i=1; i<size; i++) {
965       rlen = range[i+1] - range[i];
966       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
967       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
968     }
969   } else {
970     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
971   }
972   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
973 
974   /* load up the local column indices */
975   nzmax = nz; /* )th processor needs space a largest processor needs */
976   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
977   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
978   cnt  = 0;
979   for (i=0; i<mat->rmap->n; i++) {
980     for (j=B->i[i]; j<B->i[i+1]; j++) {
981       if ( (col = garray[B->j[j]]) > cstart) break;
982       column_indices[cnt++] = col;
983     }
984     for (k=A->i[i]; k<A->i[i+1]; k++) {
985       column_indices[cnt++] = A->j[k] + cstart;
986     }
987     for (; j<B->i[i+1]; j++) {
988       column_indices[cnt++] = garray[B->j[j]];
989     }
990   }
991   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
992 
993   /* store the column indices to the file */
994   if (!rank) {
995     MPI_Status status;
996     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
997     for (i=1; i<size; i++) {
998       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
999       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1000       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1001       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1002     }
1003   } else {
1004     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1005     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1006   }
1007   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1008 
1009   /* load up the local column values */
1010   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1011   cnt  = 0;
1012   for (i=0; i<mat->rmap->n; i++) {
1013     for (j=B->i[i]; j<B->i[i+1]; j++) {
1014       if ( garray[B->j[j]] > cstart) break;
1015       column_values[cnt++] = B->a[j];
1016     }
1017     for (k=A->i[i]; k<A->i[i+1]; k++) {
1018       column_values[cnt++] = A->a[k];
1019     }
1020     for (; j<B->i[i+1]; j++) {
1021       column_values[cnt++] = B->a[j];
1022     }
1023   }
1024   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1025 
1026   /* store the column values to the file */
1027   if (!rank) {
1028     MPI_Status status;
1029     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1030     for (i=1; i<size; i++) {
1031       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1032       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1033       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1034       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1035     }
1036   } else {
1037     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1038     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1039   }
1040   ierr = PetscFree(column_values);CHKERRQ(ierr);
1041   PetscFunctionReturn(0);
1042 }
1043 
1044 #undef __FUNCT__
1045 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1046 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1047 {
1048   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1049   PetscErrorCode    ierr;
1050   PetscMPIInt       rank = aij->rank,size = aij->size;
1051   PetscTruth        isdraw,iascii,isbinary;
1052   PetscViewer       sviewer;
1053   PetscViewerFormat format;
1054 
1055   PetscFunctionBegin;
1056   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1057   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1058   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1059   if (iascii) {
1060     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1061     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1062       MatInfo    info;
1063       PetscTruth inodes;
1064 
1065       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1066       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1067       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1068       if (!inodes) {
1069         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1070 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1071       } else {
1072         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1073 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1074       }
1075       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1076       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1077       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1078       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1079       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1080       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1081       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1082       PetscFunctionReturn(0);
1083     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1084       PetscInt   inodecount,inodelimit,*inodes;
1085       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1086       if (inodes) {
1087         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1088       } else {
1089         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1090       }
1091       PetscFunctionReturn(0);
1092     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1093       PetscFunctionReturn(0);
1094     }
1095   } else if (isbinary) {
1096     if (size == 1) {
1097       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1098       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1099     } else {
1100       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1101     }
1102     PetscFunctionReturn(0);
1103   } else if (isdraw) {
1104     PetscDraw  draw;
1105     PetscTruth isnull;
1106     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1107     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1108   }
1109 
1110   if (size == 1) {
1111     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1112     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1113   } else {
1114     /* assemble the entire matrix onto first processor. */
1115     Mat         A;
1116     Mat_SeqAIJ  *Aloc;
1117     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1118     MatScalar   *a;
1119 
1120     if (mat->rmap->N > 1024) {
1121       PetscTruth flg = PETSC_FALSE;
1122 
1123       ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1124       if (!flg) {
1125         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1126       }
1127     }
1128 
1129     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1130     if (!rank) {
1131       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1132     } else {
1133       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1134     }
1135     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1136     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1137     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1138     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1139 
1140     /* copy over the A part */
1141     Aloc = (Mat_SeqAIJ*)aij->A->data;
1142     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1143     row = mat->rmap->rstart;
1144     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1145     for (i=0; i<m; i++) {
1146       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1147       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1148     }
1149     aj = Aloc->j;
1150     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1151 
1152     /* copy over the B part */
1153     Aloc = (Mat_SeqAIJ*)aij->B->data;
1154     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1155     row  = mat->rmap->rstart;
1156     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1157     ct   = cols;
1158     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1159     for (i=0; i<m; i++) {
1160       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1161       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1162     }
1163     ierr = PetscFree(ct);CHKERRQ(ierr);
1164     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1165     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1166     /*
1167        Everyone has to call to draw the matrix since the graphics waits are
1168        synchronized across all processors that share the PetscDraw object
1169     */
1170     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1171     if (!rank) {
1172       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1173       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1174     }
1175     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1176     ierr = MatDestroy(A);CHKERRQ(ierr);
1177   }
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 #undef __FUNCT__
1182 #define __FUNCT__ "MatView_MPIAIJ"
1183 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1184 {
1185   PetscErrorCode ierr;
1186   PetscTruth     iascii,isdraw,issocket,isbinary;
1187 
1188   PetscFunctionBegin;
1189   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1190   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1191   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1192   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1193   if (iascii || isdraw || isbinary || issocket) {
1194     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1195   } else {
1196     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1197   }
1198   PetscFunctionReturn(0);
1199 }
1200 
1201 #undef __FUNCT__
1202 #define __FUNCT__ "MatSOR_MPIAIJ"
1203 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1204 {
1205   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1206   PetscErrorCode ierr;
1207   Vec            bb1 = 0;
1208   PetscTruth     hasop;
1209 
1210   PetscFunctionBegin;
1211   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1212     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1213   }
1214 
1215   if (flag == SOR_APPLY_UPPER) {
1216     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1217     PetscFunctionReturn(0);
1218   }
1219 
1220   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1221     if (flag & SOR_ZERO_INITIAL_GUESS) {
1222       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1223       its--;
1224     }
1225 
1226     while (its--) {
1227       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1228       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1229 
1230       /* update rhs: bb1 = bb - B*x */
1231       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1232       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1233 
1234       /* local sweep */
1235       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1236     }
1237   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1238     if (flag & SOR_ZERO_INITIAL_GUESS) {
1239       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1240       its--;
1241     }
1242     while (its--) {
1243       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1244       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1245 
1246       /* update rhs: bb1 = bb - B*x */
1247       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1248       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1249 
1250       /* local sweep */
1251       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1252     }
1253   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1254     if (flag & SOR_ZERO_INITIAL_GUESS) {
1255       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1256       its--;
1257     }
1258     while (its--) {
1259       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1260       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1261 
1262       /* update rhs: bb1 = bb - B*x */
1263       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1264       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1265 
1266       /* local sweep */
1267       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1268     }
1269   }  else if (flag & SOR_EISENSTAT) {
1270     Vec         xx1;
1271 
1272     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1273     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1274 
1275     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1276     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1277     if (!mat->diag) {
1278       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1279       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1280     }
1281     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1282     if (hasop) {
1283       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1284     } else {
1285       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1286     }
1287     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1288 
1289     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1290 
1291     /* local sweep */
1292     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1293     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1294     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1295   } else {
1296     SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1297   }
1298 
1299   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 #undef __FUNCT__
1304 #define __FUNCT__ "MatPermute_MPIAIJ"
1305 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1306 {
1307   MPI_Comm       comm,pcomm;
1308   PetscInt       first,local_size,nrows;
1309   const PetscInt *rows;
1310   PetscMPIInt    size;
1311   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1312   PetscErrorCode ierr;
1313 
1314   PetscFunctionBegin;
1315   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1316   /* make a collective version of 'rowp' */
1317   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1318   if (pcomm==comm) {
1319     crowp = rowp;
1320   } else {
1321     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1322     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1323     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1324     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1325   }
1326   /* collect the global row permutation and invert it */
1327   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1328   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1329   if (pcomm!=comm) {
1330     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1331   }
1332   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1333   /* get the local target indices */
1334   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1335   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1336   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1337   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
1338   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1339   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1340   /* the column permutation is so much easier;
1341      make a local version of 'colp' and invert it */
1342   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1343   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1344   if (size==1) {
1345     lcolp = colp;
1346   } else {
1347     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1348     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1349     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
1350   }
1351   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1352   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1353   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1354   if (size>1) {
1355     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1356     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1357   }
1358   /* now we just get the submatrix */
1359   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1360   /* clean up */
1361   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1362   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 #undef __FUNCT__
1367 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1368 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1369 {
1370   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1371   Mat            A = mat->A,B = mat->B;
1372   PetscErrorCode ierr;
1373   PetscReal      isend[5],irecv[5];
1374 
1375   PetscFunctionBegin;
1376   info->block_size     = 1.0;
1377   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1378   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1379   isend[3] = info->memory;  isend[4] = info->mallocs;
1380   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1381   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1382   isend[3] += info->memory;  isend[4] += info->mallocs;
1383   if (flag == MAT_LOCAL) {
1384     info->nz_used      = isend[0];
1385     info->nz_allocated = isend[1];
1386     info->nz_unneeded  = isend[2];
1387     info->memory       = isend[3];
1388     info->mallocs      = isend[4];
1389   } else if (flag == MAT_GLOBAL_MAX) {
1390     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1391     info->nz_used      = irecv[0];
1392     info->nz_allocated = irecv[1];
1393     info->nz_unneeded  = irecv[2];
1394     info->memory       = irecv[3];
1395     info->mallocs      = irecv[4];
1396   } else if (flag == MAT_GLOBAL_SUM) {
1397     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1398     info->nz_used      = irecv[0];
1399     info->nz_allocated = irecv[1];
1400     info->nz_unneeded  = irecv[2];
1401     info->memory       = irecv[3];
1402     info->mallocs      = irecv[4];
1403   }
1404   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1405   info->fill_ratio_needed = 0;
1406   info->factor_mallocs    = 0;
1407 
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 #undef __FUNCT__
1412 #define __FUNCT__ "MatSetOption_MPIAIJ"
1413 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1414 {
1415   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1416   PetscErrorCode ierr;
1417 
1418   PetscFunctionBegin;
1419   switch (op) {
1420   case MAT_NEW_NONZERO_LOCATIONS:
1421   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1422   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1423   case MAT_KEEP_NONZERO_PATTERN:
1424   case MAT_NEW_NONZERO_LOCATION_ERR:
1425   case MAT_USE_INODES:
1426   case MAT_IGNORE_ZERO_ENTRIES:
1427     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1428     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1429     break;
1430   case MAT_ROW_ORIENTED:
1431     a->roworiented = flg;
1432     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1433     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1434     break;
1435   case MAT_NEW_DIAGONALS:
1436     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1437     break;
1438   case MAT_IGNORE_OFF_PROC_ENTRIES:
1439     a->donotstash = PETSC_TRUE;
1440     break;
1441   case MAT_SYMMETRIC:
1442     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1443     break;
1444   case MAT_STRUCTURALLY_SYMMETRIC:
1445     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1446     break;
1447   case MAT_HERMITIAN:
1448     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1449     break;
1450   case MAT_SYMMETRY_ETERNAL:
1451     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1452     break;
1453   default:
1454     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1455   }
1456   PetscFunctionReturn(0);
1457 }
1458 
1459 #undef __FUNCT__
1460 #define __FUNCT__ "MatGetRow_MPIAIJ"
1461 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1462 {
1463   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1464   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1465   PetscErrorCode ierr;
1466   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1467   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1468   PetscInt       *cmap,*idx_p;
1469 
1470   PetscFunctionBegin;
1471   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1472   mat->getrowactive = PETSC_TRUE;
1473 
1474   if (!mat->rowvalues && (idx || v)) {
1475     /*
1476         allocate enough space to hold information from the longest row.
1477     */
1478     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1479     PetscInt   max = 1,tmp;
1480     for (i=0; i<matin->rmap->n; i++) {
1481       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1482       if (max < tmp) { max = tmp; }
1483     }
1484     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1485   }
1486 
1487   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1488   lrow = row - rstart;
1489 
1490   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1491   if (!v)   {pvA = 0; pvB = 0;}
1492   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1493   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1494   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1495   nztot = nzA + nzB;
1496 
1497   cmap  = mat->garray;
1498   if (v  || idx) {
1499     if (nztot) {
1500       /* Sort by increasing column numbers, assuming A and B already sorted */
1501       PetscInt imark = -1;
1502       if (v) {
1503         *v = v_p = mat->rowvalues;
1504         for (i=0; i<nzB; i++) {
1505           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1506           else break;
1507         }
1508         imark = i;
1509         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1510         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1511       }
1512       if (idx) {
1513         *idx = idx_p = mat->rowindices;
1514         if (imark > -1) {
1515           for (i=0; i<imark; i++) {
1516             idx_p[i] = cmap[cworkB[i]];
1517           }
1518         } else {
1519           for (i=0; i<nzB; i++) {
1520             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1521             else break;
1522           }
1523           imark = i;
1524         }
1525         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1526         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1527       }
1528     } else {
1529       if (idx) *idx = 0;
1530       if (v)   *v   = 0;
1531     }
1532   }
1533   *nz = nztot;
1534   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1535   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1536   PetscFunctionReturn(0);
1537 }
1538 
1539 #undef __FUNCT__
1540 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1541 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1542 {
1543   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1544 
1545   PetscFunctionBegin;
1546   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1547   aij->getrowactive = PETSC_FALSE;
1548   PetscFunctionReturn(0);
1549 }
1550 
1551 #undef __FUNCT__
1552 #define __FUNCT__ "MatNorm_MPIAIJ"
1553 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1554 {
1555   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1556   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1557   PetscErrorCode ierr;
1558   PetscInt       i,j,cstart = mat->cmap->rstart;
1559   PetscReal      sum = 0.0;
1560   MatScalar      *v;
1561 
1562   PetscFunctionBegin;
1563   if (aij->size == 1) {
1564     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1565   } else {
1566     if (type == NORM_FROBENIUS) {
1567       v = amat->a;
1568       for (i=0; i<amat->nz; i++) {
1569 #if defined(PETSC_USE_COMPLEX)
1570         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1571 #else
1572         sum += (*v)*(*v); v++;
1573 #endif
1574       }
1575       v = bmat->a;
1576       for (i=0; i<bmat->nz; i++) {
1577 #if defined(PETSC_USE_COMPLEX)
1578         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1579 #else
1580         sum += (*v)*(*v); v++;
1581 #endif
1582       }
1583       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1584       *norm = sqrt(*norm);
1585     } else if (type == NORM_1) { /* max column norm */
1586       PetscReal *tmp,*tmp2;
1587       PetscInt  *jj,*garray = aij->garray;
1588       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1589       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1590       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1591       *norm = 0.0;
1592       v = amat->a; jj = amat->j;
1593       for (j=0; j<amat->nz; j++) {
1594         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1595       }
1596       v = bmat->a; jj = bmat->j;
1597       for (j=0; j<bmat->nz; j++) {
1598         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1599       }
1600       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1601       for (j=0; j<mat->cmap->N; j++) {
1602         if (tmp2[j] > *norm) *norm = tmp2[j];
1603       }
1604       ierr = PetscFree(tmp);CHKERRQ(ierr);
1605       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1606     } else if (type == NORM_INFINITY) { /* max row norm */
1607       PetscReal ntemp = 0.0;
1608       for (j=0; j<aij->A->rmap->n; j++) {
1609         v = amat->a + amat->i[j];
1610         sum = 0.0;
1611         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1612           sum += PetscAbsScalar(*v); v++;
1613         }
1614         v = bmat->a + bmat->i[j];
1615         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1616           sum += PetscAbsScalar(*v); v++;
1617         }
1618         if (sum > ntemp) ntemp = sum;
1619       }
1620       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1621     } else {
1622       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1623     }
1624   }
1625   PetscFunctionReturn(0);
1626 }
1627 
1628 #undef __FUNCT__
1629 #define __FUNCT__ "MatTranspose_MPIAIJ"
1630 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1631 {
1632   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1633   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1634   PetscErrorCode ierr;
1635   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1636   PetscInt       cstart=A->cmap->rstart,ncol;
1637   Mat            B;
1638   MatScalar      *array;
1639 
1640   PetscFunctionBegin;
1641   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1642 
1643   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1644   ai = Aloc->i; aj = Aloc->j;
1645   bi = Bloc->i; bj = Bloc->j;
1646   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1647     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1648     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1649     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1650     for (i=0; i<ai[ma]; i++){
1651       d_nnz[aj[i]] ++;
1652       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1653     }
1654 
1655     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1656     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1657     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1658     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1659     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1660   } else {
1661     B = *matout;
1662   }
1663 
1664   /* copy over the A part */
1665   array = Aloc->a;
1666   row = A->rmap->rstart;
1667   for (i=0; i<ma; i++) {
1668     ncol = ai[i+1]-ai[i];
1669     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1670     row++; array += ncol; aj += ncol;
1671   }
1672   aj = Aloc->j;
1673   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1674 
1675   /* copy over the B part */
1676   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1677   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1678   array = Bloc->a;
1679   row = A->rmap->rstart;
1680   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1681   cols_tmp = cols;
1682   for (i=0; i<mb; i++) {
1683     ncol = bi[i+1]-bi[i];
1684     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1685     row++; array += ncol; cols_tmp += ncol;
1686   }
1687   ierr = PetscFree(cols);CHKERRQ(ierr);
1688 
1689   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1690   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1691   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1692     *matout = B;
1693   } else {
1694     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1695   }
1696   PetscFunctionReturn(0);
1697 }
1698 
1699 #undef __FUNCT__
1700 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1701 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1702 {
1703   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1704   Mat            a = aij->A,b = aij->B;
1705   PetscErrorCode ierr;
1706   PetscInt       s1,s2,s3;
1707 
1708   PetscFunctionBegin;
1709   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1710   if (rr) {
1711     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1712     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1713     /* Overlap communication with computation. */
1714     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1715   }
1716   if (ll) {
1717     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1718     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1719     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1720   }
1721   /* scale  the diagonal block */
1722   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1723 
1724   if (rr) {
1725     /* Do a scatter end and then right scale the off-diagonal block */
1726     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1727     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1728   }
1729 
1730   PetscFunctionReturn(0);
1731 }
1732 
1733 #undef __FUNCT__
1734 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1735 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1736 {
1737   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1738   PetscErrorCode ierr;
1739 
1740   PetscFunctionBegin;
1741   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1742   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1743   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
1744   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
1745   PetscFunctionReturn(0);
1746 }
1747 #undef __FUNCT__
1748 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1749 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1750 {
1751   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1752   PetscErrorCode ierr;
1753 
1754   PetscFunctionBegin;
1755   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1756   PetscFunctionReturn(0);
1757 }
1758 
1759 #undef __FUNCT__
1760 #define __FUNCT__ "MatEqual_MPIAIJ"
1761 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1762 {
1763   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1764   Mat            a,b,c,d;
1765   PetscTruth     flg;
1766   PetscErrorCode ierr;
1767 
1768   PetscFunctionBegin;
1769   a = matA->A; b = matA->B;
1770   c = matB->A; d = matB->B;
1771 
1772   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1773   if (flg) {
1774     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1775   }
1776   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1777   PetscFunctionReturn(0);
1778 }
1779 
1780 #undef __FUNCT__
1781 #define __FUNCT__ "MatCopy_MPIAIJ"
1782 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1783 {
1784   PetscErrorCode ierr;
1785   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1786   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1787 
1788   PetscFunctionBegin;
1789   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1790   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1791     /* because of the column compression in the off-processor part of the matrix a->B,
1792        the number of columns in a->B and b->B may be different, hence we cannot call
1793        the MatCopy() directly on the two parts. If need be, we can provide a more
1794        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1795        then copying the submatrices */
1796     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1797   } else {
1798     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1799     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1800   }
1801   PetscFunctionReturn(0);
1802 }
1803 
1804 #undef __FUNCT__
1805 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1806 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1807 {
1808   PetscErrorCode ierr;
1809 
1810   PetscFunctionBegin;
1811   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1812   PetscFunctionReturn(0);
1813 }
1814 
1815 #undef __FUNCT__
1816 #define __FUNCT__ "MatAXPY_MPIAIJ"
1817 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1818 {
1819   PetscErrorCode ierr;
1820   PetscInt       i;
1821   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1822   PetscBLASInt   bnz,one=1;
1823   Mat_SeqAIJ     *x,*y;
1824 
1825   PetscFunctionBegin;
1826   if (str == SAME_NONZERO_PATTERN) {
1827     PetscScalar alpha = a;
1828     x = (Mat_SeqAIJ *)xx->A->data;
1829     y = (Mat_SeqAIJ *)yy->A->data;
1830     bnz = PetscBLASIntCast(x->nz);
1831     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1832     x = (Mat_SeqAIJ *)xx->B->data;
1833     y = (Mat_SeqAIJ *)yy->B->data;
1834     bnz = PetscBLASIntCast(x->nz);
1835     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1836   } else if (str == SUBSET_NONZERO_PATTERN) {
1837     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1838 
1839     x = (Mat_SeqAIJ *)xx->B->data;
1840     y = (Mat_SeqAIJ *)yy->B->data;
1841     if (y->xtoy && y->XtoY != xx->B) {
1842       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1843       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1844     }
1845     if (!y->xtoy) { /* get xtoy */
1846       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1847       y->XtoY = xx->B;
1848       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1849     }
1850     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1851   } else {
1852     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1853   }
1854   PetscFunctionReturn(0);
1855 }
1856 
1857 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1858 
1859 #undef __FUNCT__
1860 #define __FUNCT__ "MatConjugate_MPIAIJ"
1861 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1862 {
1863 #if defined(PETSC_USE_COMPLEX)
1864   PetscErrorCode ierr;
1865   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1866 
1867   PetscFunctionBegin;
1868   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1869   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1870 #else
1871   PetscFunctionBegin;
1872 #endif
1873   PetscFunctionReturn(0);
1874 }
1875 
1876 #undef __FUNCT__
1877 #define __FUNCT__ "MatRealPart_MPIAIJ"
1878 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1879 {
1880   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1881   PetscErrorCode ierr;
1882 
1883   PetscFunctionBegin;
1884   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1885   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1886   PetscFunctionReturn(0);
1887 }
1888 
1889 #undef __FUNCT__
1890 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1891 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1892 {
1893   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1894   PetscErrorCode ierr;
1895 
1896   PetscFunctionBegin;
1897   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1898   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1899   PetscFunctionReturn(0);
1900 }
1901 
1902 #ifdef PETSC_HAVE_PBGL
1903 
1904 #include <boost/parallel/mpi/bsp_process_group.hpp>
1905 #include <boost/graph/distributed/ilu_default_graph.hpp>
1906 #include <boost/graph/distributed/ilu_0_block.hpp>
1907 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1908 #include <boost/graph/distributed/petsc/interface.hpp>
1909 #include <boost/multi_array.hpp>
1910 #include <boost/parallel/distributed_property_map->hpp>
1911 
1912 #undef __FUNCT__
1913 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1914 /*
1915   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1916 */
1917 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1918 {
1919   namespace petsc = boost::distributed::petsc;
1920 
1921   namespace graph_dist = boost::graph::distributed;
1922   using boost::graph::distributed::ilu_default::process_group_type;
1923   using boost::graph::ilu_permuted;
1924 
1925   PetscTruth      row_identity, col_identity;
1926   PetscContainer  c;
1927   PetscInt        m, n, M, N;
1928   PetscErrorCode  ierr;
1929 
1930   PetscFunctionBegin;
1931   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1932   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1933   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1934   if (!row_identity || !col_identity) {
1935     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1936   }
1937 
1938   process_group_type pg;
1939   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1940   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1941   lgraph_type&   level_graph = *lgraph_p;
1942   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1943 
1944   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1945   ilu_permuted(level_graph);
1946 
1947   /* put together the new matrix */
1948   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1949   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1950   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1951   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1952   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1953   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1954   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1955 
1956   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1957   ierr = PetscContainerSetPointer(c, lgraph_p);
1958   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1959   PetscFunctionReturn(0);
1960 }
1961 
1962 #undef __FUNCT__
1963 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1964 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1965 {
1966   PetscFunctionBegin;
1967   PetscFunctionReturn(0);
1968 }
1969 
1970 #undef __FUNCT__
1971 #define __FUNCT__ "MatSolve_MPIAIJ"
1972 /*
1973   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1974 */
1975 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1976 {
1977   namespace graph_dist = boost::graph::distributed;
1978 
1979   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1980   lgraph_type*   lgraph_p;
1981   PetscContainer c;
1982   PetscErrorCode ierr;
1983 
1984   PetscFunctionBegin;
1985   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1986   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1987   ierr = VecCopy(b, x);CHKERRQ(ierr);
1988 
1989   PetscScalar* array_x;
1990   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1991   PetscInt sx;
1992   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1993 
1994   PetscScalar* array_b;
1995   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1996   PetscInt sb;
1997   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1998 
1999   lgraph_type&   level_graph = *lgraph_p;
2000   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2001 
2002   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2003   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2004                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2005 
2006   typedef boost::iterator_property_map<array_ref_type::iterator,
2007                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2008   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2009                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2010 
2011   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2012 
2013   PetscFunctionReturn(0);
2014 }
2015 #endif
2016 
2017 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2018   PetscInt       nzlocal,nsends,nrecvs;
2019   PetscMPIInt    *send_rank,*recv_rank;
2020   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2021   PetscScalar    *sbuf_a,**rbuf_a;
2022   PetscErrorCode (*MatDestroy)(Mat);
2023 } Mat_Redundant;
2024 
2025 #undef __FUNCT__
2026 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2027 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2028 {
2029   PetscErrorCode       ierr;
2030   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2031   PetscInt             i;
2032 
2033   PetscFunctionBegin;
2034   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2035   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2036   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2037   for (i=0; i<redund->nrecvs; i++){
2038     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2039     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2040   }
2041   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2042   ierr = PetscFree(redund);CHKERRQ(ierr);
2043   PetscFunctionReturn(0);
2044 }
2045 
2046 #undef __FUNCT__
2047 #define __FUNCT__ "MatDestroy_MatRedundant"
2048 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2049 {
2050   PetscErrorCode  ierr;
2051   PetscContainer  container;
2052   Mat_Redundant   *redund=PETSC_NULL;
2053 
2054   PetscFunctionBegin;
2055   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2056   if (container) {
2057     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2058   } else {
2059     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2060   }
2061   A->ops->destroy = redund->MatDestroy;
2062   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2063   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2064   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2065   PetscFunctionReturn(0);
2066 }
2067 
2068 #undef __FUNCT__
2069 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2070 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2071 {
2072   PetscMPIInt    rank,size;
2073   MPI_Comm       comm=((PetscObject)mat)->comm;
2074   PetscErrorCode ierr;
2075   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2076   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2077   PetscInt       *rowrange=mat->rmap->range;
2078   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2079   Mat            A=aij->A,B=aij->B,C=*matredundant;
2080   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2081   PetscScalar    *sbuf_a;
2082   PetscInt       nzlocal=a->nz+b->nz;
2083   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2084   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2085   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2086   MatScalar      *aworkA,*aworkB;
2087   PetscScalar    *vals;
2088   PetscMPIInt    tag1,tag2,tag3,imdex;
2089   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2090                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2091   MPI_Status     recv_status,*send_status;
2092   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2093   PetscInt       **rbuf_j=PETSC_NULL;
2094   PetscScalar    **rbuf_a=PETSC_NULL;
2095   Mat_Redundant  *redund=PETSC_NULL;
2096   PetscContainer container;
2097 
2098   PetscFunctionBegin;
2099   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2100   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2101 
2102   if (reuse == MAT_REUSE_MATRIX) {
2103     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2104     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2105     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2106     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2107     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2108     if (container) {
2109       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2110     } else {
2111       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2112     }
2113     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2114 
2115     nsends    = redund->nsends;
2116     nrecvs    = redund->nrecvs;
2117     send_rank = redund->send_rank;
2118     recv_rank = redund->recv_rank;
2119     sbuf_nz   = redund->sbuf_nz;
2120     rbuf_nz   = redund->rbuf_nz;
2121     sbuf_j    = redund->sbuf_j;
2122     sbuf_a    = redund->sbuf_a;
2123     rbuf_j    = redund->rbuf_j;
2124     rbuf_a    = redund->rbuf_a;
2125   }
2126 
2127   if (reuse == MAT_INITIAL_MATRIX){
2128     PetscMPIInt  subrank,subsize;
2129     PetscInt     nleftover,np_subcomm;
2130     /* get the destination processors' id send_rank, nsends and nrecvs */
2131     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2132     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2133     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2134     np_subcomm = size/nsubcomm;
2135     nleftover  = size - nsubcomm*np_subcomm;
2136     nsends = 0; nrecvs = 0;
2137     for (i=0; i<size; i++){ /* i=rank*/
2138       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2139         send_rank[nsends] = i; nsends++;
2140         recv_rank[nrecvs++] = i;
2141       }
2142     }
2143     if (rank >= size - nleftover){/* this proc is a leftover processor */
2144       i = size-nleftover-1;
2145       j = 0;
2146       while (j < nsubcomm - nleftover){
2147         send_rank[nsends++] = i;
2148         i--; j++;
2149       }
2150     }
2151 
2152     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2153       for (i=0; i<nleftover; i++){
2154         recv_rank[nrecvs++] = size-nleftover+i;
2155       }
2156     }
2157 
2158     /* allocate sbuf_j, sbuf_a */
2159     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2160     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2161     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2162   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2163 
2164   /* copy mat's local entries into the buffers */
2165   if (reuse == MAT_INITIAL_MATRIX){
2166     rownz_max = 0;
2167     rptr = sbuf_j;
2168     cols = sbuf_j + rend-rstart + 1;
2169     vals = sbuf_a;
2170     rptr[0] = 0;
2171     for (i=0; i<rend-rstart; i++){
2172       row = i + rstart;
2173       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2174       ncols  = nzA + nzB;
2175       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2176       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2177       /* load the column indices for this row into cols */
2178       lwrite = 0;
2179       for (l=0; l<nzB; l++) {
2180         if ((ctmp = bmap[cworkB[l]]) < cstart){
2181           vals[lwrite]   = aworkB[l];
2182           cols[lwrite++] = ctmp;
2183         }
2184       }
2185       for (l=0; l<nzA; l++){
2186         vals[lwrite]   = aworkA[l];
2187         cols[lwrite++] = cstart + cworkA[l];
2188       }
2189       for (l=0; l<nzB; l++) {
2190         if ((ctmp = bmap[cworkB[l]]) >= cend){
2191           vals[lwrite]   = aworkB[l];
2192           cols[lwrite++] = ctmp;
2193         }
2194       }
2195       vals += ncols;
2196       cols += ncols;
2197       rptr[i+1] = rptr[i] + ncols;
2198       if (rownz_max < ncols) rownz_max = ncols;
2199     }
2200     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2201   } else { /* only copy matrix values into sbuf_a */
2202     rptr = sbuf_j;
2203     vals = sbuf_a;
2204     rptr[0] = 0;
2205     for (i=0; i<rend-rstart; i++){
2206       row = i + rstart;
2207       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2208       ncols  = nzA + nzB;
2209       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2210       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2211       lwrite = 0;
2212       for (l=0; l<nzB; l++) {
2213         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2214       }
2215       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2216       for (l=0; l<nzB; l++) {
2217         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2218       }
2219       vals += ncols;
2220       rptr[i+1] = rptr[i] + ncols;
2221     }
2222   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2223 
2224   /* send nzlocal to others, and recv other's nzlocal */
2225   /*--------------------------------------------------*/
2226   if (reuse == MAT_INITIAL_MATRIX){
2227     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2228     s_waits2 = s_waits3 + nsends;
2229     s_waits1 = s_waits2 + nsends;
2230     r_waits1 = s_waits1 + nsends;
2231     r_waits2 = r_waits1 + nrecvs;
2232     r_waits3 = r_waits2 + nrecvs;
2233   } else {
2234     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2235     r_waits3 = s_waits3 + nsends;
2236   }
2237 
2238   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2239   if (reuse == MAT_INITIAL_MATRIX){
2240     /* get new tags to keep the communication clean */
2241     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2242     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2243     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2244 
2245     /* post receives of other's nzlocal */
2246     for (i=0; i<nrecvs; i++){
2247       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2248     }
2249     /* send nzlocal to others */
2250     for (i=0; i<nsends; i++){
2251       sbuf_nz[i] = nzlocal;
2252       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2253     }
2254     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2255     count = nrecvs;
2256     while (count) {
2257       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2258       recv_rank[imdex] = recv_status.MPI_SOURCE;
2259       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2260       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2261 
2262       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2263       rbuf_nz[imdex] += i + 2;
2264       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2265       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2266       count--;
2267     }
2268     /* wait on sends of nzlocal */
2269     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2270     /* send mat->i,j to others, and recv from other's */
2271     /*------------------------------------------------*/
2272     for (i=0; i<nsends; i++){
2273       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2274       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2275     }
2276     /* wait on receives of mat->i,j */
2277     /*------------------------------*/
2278     count = nrecvs;
2279     while (count) {
2280       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2281       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2282       count--;
2283     }
2284     /* wait on sends of mat->i,j */
2285     /*---------------------------*/
2286     if (nsends) {
2287       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2288     }
2289   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2290 
2291   /* post receives, send and receive mat->a */
2292   /*----------------------------------------*/
2293   for (imdex=0; imdex<nrecvs; imdex++) {
2294     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2295   }
2296   for (i=0; i<nsends; i++){
2297     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2298   }
2299   count = nrecvs;
2300   while (count) {
2301     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2302     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2303     count--;
2304   }
2305   if (nsends) {
2306     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2307   }
2308 
2309   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2310 
2311   /* create redundant matrix */
2312   /*-------------------------*/
2313   if (reuse == MAT_INITIAL_MATRIX){
2314     /* compute rownz_max for preallocation */
2315     for (imdex=0; imdex<nrecvs; imdex++){
2316       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2317       rptr = rbuf_j[imdex];
2318       for (i=0; i<j; i++){
2319         ncols = rptr[i+1] - rptr[i];
2320         if (rownz_max < ncols) rownz_max = ncols;
2321       }
2322     }
2323 
2324     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2325     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2326     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2327     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2328     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2329   } else {
2330     C = *matredundant;
2331   }
2332 
2333   /* insert local matrix entries */
2334   rptr = sbuf_j;
2335   cols = sbuf_j + rend-rstart + 1;
2336   vals = sbuf_a;
2337   for (i=0; i<rend-rstart; i++){
2338     row   = i + rstart;
2339     ncols = rptr[i+1] - rptr[i];
2340     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2341     vals += ncols;
2342     cols += ncols;
2343   }
2344   /* insert received matrix entries */
2345   for (imdex=0; imdex<nrecvs; imdex++){
2346     rstart = rowrange[recv_rank[imdex]];
2347     rend   = rowrange[recv_rank[imdex]+1];
2348     rptr = rbuf_j[imdex];
2349     cols = rbuf_j[imdex] + rend-rstart + 1;
2350     vals = rbuf_a[imdex];
2351     for (i=0; i<rend-rstart; i++){
2352       row   = i + rstart;
2353       ncols = rptr[i+1] - rptr[i];
2354       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2355       vals += ncols;
2356       cols += ncols;
2357     }
2358   }
2359   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2360   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2361   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2362   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2363   if (reuse == MAT_INITIAL_MATRIX){
2364     PetscContainer container;
2365     *matredundant = C;
2366     /* create a supporting struct and attach it to C for reuse */
2367     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2368     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2369     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2370     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2371     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2372 
2373     redund->nzlocal = nzlocal;
2374     redund->nsends  = nsends;
2375     redund->nrecvs  = nrecvs;
2376     redund->send_rank = send_rank;
2377     redund->recv_rank = recv_rank;
2378     redund->sbuf_nz = sbuf_nz;
2379     redund->rbuf_nz = rbuf_nz;
2380     redund->sbuf_j  = sbuf_j;
2381     redund->sbuf_a  = sbuf_a;
2382     redund->rbuf_j  = rbuf_j;
2383     redund->rbuf_a  = rbuf_a;
2384 
2385     redund->MatDestroy = C->ops->destroy;
2386     C->ops->destroy    = MatDestroy_MatRedundant;
2387   }
2388   PetscFunctionReturn(0);
2389 }
2390 
2391 #undef __FUNCT__
2392 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2393 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2394 {
2395   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2396   PetscErrorCode ierr;
2397   PetscInt       i,*idxb = 0;
2398   PetscScalar    *va,*vb;
2399   Vec            vtmp;
2400 
2401   PetscFunctionBegin;
2402   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2403   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2404   if (idx) {
2405     for (i=0; i<A->rmap->n; i++) {
2406       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2407     }
2408   }
2409 
2410   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2411   if (idx) {
2412     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2413   }
2414   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2415   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2416 
2417   for (i=0; i<A->rmap->n; i++){
2418     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2419       va[i] = vb[i];
2420       if (idx) idx[i] = a->garray[idxb[i]];
2421     }
2422   }
2423 
2424   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2425   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2426   if (idxb) {
2427     ierr = PetscFree(idxb);CHKERRQ(ierr);
2428   }
2429   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2430   PetscFunctionReturn(0);
2431 }
2432 
2433 #undef __FUNCT__
2434 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2435 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2436 {
2437   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2438   PetscErrorCode ierr;
2439   PetscInt       i,*idxb = 0;
2440   PetscScalar    *va,*vb;
2441   Vec            vtmp;
2442 
2443   PetscFunctionBegin;
2444   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2445   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2446   if (idx) {
2447     for (i=0; i<A->cmap->n; i++) {
2448       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2449     }
2450   }
2451 
2452   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2453   if (idx) {
2454     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2455   }
2456   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2457   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2458 
2459   for (i=0; i<A->rmap->n; i++){
2460     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2461       va[i] = vb[i];
2462       if (idx) idx[i] = a->garray[idxb[i]];
2463     }
2464   }
2465 
2466   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2467   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2468   if (idxb) {
2469     ierr = PetscFree(idxb);CHKERRQ(ierr);
2470   }
2471   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2472   PetscFunctionReturn(0);
2473 }
2474 
2475 #undef __FUNCT__
2476 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2477 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2478 {
2479   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2480   PetscInt       n      = A->rmap->n;
2481   PetscInt       cstart = A->cmap->rstart;
2482   PetscInt      *cmap   = mat->garray;
2483   PetscInt      *diagIdx, *offdiagIdx;
2484   Vec            diagV, offdiagV;
2485   PetscScalar   *a, *diagA, *offdiagA;
2486   PetscInt       r;
2487   PetscErrorCode ierr;
2488 
2489   PetscFunctionBegin;
2490   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2491   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2492   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2493   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2494   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2495   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2496   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2497   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2498   for(r = 0; r < n; ++r) {
2499     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2500       a[r]   = diagA[r];
2501       idx[r] = cstart + diagIdx[r];
2502     } else {
2503       a[r]   = offdiagA[r];
2504       idx[r] = cmap[offdiagIdx[r]];
2505     }
2506   }
2507   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2508   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2509   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2510   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2511   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2512   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2513   PetscFunctionReturn(0);
2514 }
2515 
2516 #undef __FUNCT__
2517 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2518 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2519 {
2520   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2521   PetscInt       n      = A->rmap->n;
2522   PetscInt       cstart = A->cmap->rstart;
2523   PetscInt      *cmap   = mat->garray;
2524   PetscInt      *diagIdx, *offdiagIdx;
2525   Vec            diagV, offdiagV;
2526   PetscScalar   *a, *diagA, *offdiagA;
2527   PetscInt       r;
2528   PetscErrorCode ierr;
2529 
2530   PetscFunctionBegin;
2531   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2532   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2533   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2534   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2535   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2536   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2537   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2538   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2539   for(r = 0; r < n; ++r) {
2540     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2541       a[r]   = diagA[r];
2542       idx[r] = cstart + diagIdx[r];
2543     } else {
2544       a[r]   = offdiagA[r];
2545       idx[r] = cmap[offdiagIdx[r]];
2546     }
2547   }
2548   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2549   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2550   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2551   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2552   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2553   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2554   PetscFunctionReturn(0);
2555 }
2556 
2557 #undef __FUNCT__
2558 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2559 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2560 {
2561   PetscErrorCode ierr;
2562   Mat            *dummy;
2563 
2564   PetscFunctionBegin;
2565   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2566   *newmat = *dummy;
2567   ierr = PetscFree(dummy);CHKERRQ(ierr);
2568   PetscFunctionReturn(0);
2569 }
2570 
2571 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2572 /* -------------------------------------------------------------------*/
2573 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2574        MatGetRow_MPIAIJ,
2575        MatRestoreRow_MPIAIJ,
2576        MatMult_MPIAIJ,
2577 /* 4*/ MatMultAdd_MPIAIJ,
2578        MatMultTranspose_MPIAIJ,
2579        MatMultTransposeAdd_MPIAIJ,
2580 #ifdef PETSC_HAVE_PBGL
2581        MatSolve_MPIAIJ,
2582 #else
2583        0,
2584 #endif
2585        0,
2586        0,
2587 /*10*/ 0,
2588        0,
2589        0,
2590        MatSOR_MPIAIJ,
2591        MatTranspose_MPIAIJ,
2592 /*15*/ MatGetInfo_MPIAIJ,
2593        MatEqual_MPIAIJ,
2594        MatGetDiagonal_MPIAIJ,
2595        MatDiagonalScale_MPIAIJ,
2596        MatNorm_MPIAIJ,
2597 /*20*/ MatAssemblyBegin_MPIAIJ,
2598        MatAssemblyEnd_MPIAIJ,
2599        MatSetOption_MPIAIJ,
2600        MatZeroEntries_MPIAIJ,
2601 /*24*/ MatZeroRows_MPIAIJ,
2602        0,
2603 #ifdef PETSC_HAVE_PBGL
2604        0,
2605 #else
2606        0,
2607 #endif
2608        0,
2609        0,
2610 /*29*/ MatSetUpPreallocation_MPIAIJ,
2611 #ifdef PETSC_HAVE_PBGL
2612        0,
2613 #else
2614        0,
2615 #endif
2616        0,
2617        0,
2618        0,
2619 /*34*/ MatDuplicate_MPIAIJ,
2620        0,
2621        0,
2622        0,
2623        0,
2624 /*39*/ MatAXPY_MPIAIJ,
2625        MatGetSubMatrices_MPIAIJ,
2626        MatIncreaseOverlap_MPIAIJ,
2627        MatGetValues_MPIAIJ,
2628        MatCopy_MPIAIJ,
2629 /*44*/ MatGetRowMax_MPIAIJ,
2630        MatScale_MPIAIJ,
2631        0,
2632        0,
2633        0,
2634 /*49*/ MatSetBlockSize_MPIAIJ,
2635        0,
2636        0,
2637        0,
2638        0,
2639 /*54*/ MatFDColoringCreate_MPIAIJ,
2640        0,
2641        MatSetUnfactored_MPIAIJ,
2642        MatPermute_MPIAIJ,
2643        0,
2644 /*59*/ MatGetSubMatrix_MPIAIJ,
2645        MatDestroy_MPIAIJ,
2646        MatView_MPIAIJ,
2647        0,
2648        0,
2649 /*64*/ 0,
2650        0,
2651        0,
2652        0,
2653        0,
2654 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2655        MatGetRowMinAbs_MPIAIJ,
2656        0,
2657        MatSetColoring_MPIAIJ,
2658 #if defined(PETSC_HAVE_ADIC)
2659        MatSetValuesAdic_MPIAIJ,
2660 #else
2661        0,
2662 #endif
2663        MatSetValuesAdifor_MPIAIJ,
2664 /*75*/ MatFDColoringApply_AIJ,
2665        0,
2666        0,
2667        0,
2668        0,
2669 /*80*/ 0,
2670        0,
2671        0,
2672 /*83*/ MatLoad_MPIAIJ,
2673        0,
2674        0,
2675        0,
2676        0,
2677        0,
2678 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2679        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2680        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2681        MatPtAP_Basic,
2682        MatPtAPSymbolic_MPIAIJ,
2683 /*94*/ MatPtAPNumeric_MPIAIJ,
2684        0,
2685        0,
2686        0,
2687        0,
2688 /*99*/ 0,
2689        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2690        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2691        MatConjugate_MPIAIJ,
2692        0,
2693 /*104*/MatSetValuesRow_MPIAIJ,
2694        MatRealPart_MPIAIJ,
2695        MatImaginaryPart_MPIAIJ,
2696        0,
2697        0,
2698 /*109*/0,
2699        MatGetRedundantMatrix_MPIAIJ,
2700        MatGetRowMin_MPIAIJ,
2701        0,
2702        0,
2703 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
2704        0,
2705        0,
2706        0,
2707        0,
2708        0
2709 };
2710 
2711 /* ----------------------------------------------------------------------------------------*/
2712 
2713 EXTERN_C_BEGIN
2714 #undef __FUNCT__
2715 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2716 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2717 {
2718   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2719   PetscErrorCode ierr;
2720 
2721   PetscFunctionBegin;
2722   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2723   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2724   PetscFunctionReturn(0);
2725 }
2726 EXTERN_C_END
2727 
2728 EXTERN_C_BEGIN
2729 #undef __FUNCT__
2730 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2731 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2732 {
2733   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2734   PetscErrorCode ierr;
2735 
2736   PetscFunctionBegin;
2737   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2738   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2739   PetscFunctionReturn(0);
2740 }
2741 EXTERN_C_END
2742 
2743 #include "petscpc.h"
2744 EXTERN_C_BEGIN
2745 #undef __FUNCT__
2746 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2747 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2748 {
2749   Mat_MPIAIJ     *b;
2750   PetscErrorCode ierr;
2751   PetscInt       i;
2752 
2753   PetscFunctionBegin;
2754   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2755   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2756   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2757   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2758 
2759   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2760   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2761   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2762   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2763   if (d_nnz) {
2764     for (i=0; i<B->rmap->n; i++) {
2765       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2766     }
2767   }
2768   if (o_nnz) {
2769     for (i=0; i<B->rmap->n; i++) {
2770       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2771     }
2772   }
2773   b = (Mat_MPIAIJ*)B->data;
2774 
2775   if (!B->preallocated) {
2776     /* Explicitly create 2 MATSEQAIJ matrices. */
2777     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2778     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2779     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2780     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2781     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2782     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2783     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2784     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2785   }
2786 
2787   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2788   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2789   B->preallocated = PETSC_TRUE;
2790   PetscFunctionReturn(0);
2791 }
2792 EXTERN_C_END
2793 
2794 #undef __FUNCT__
2795 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2796 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2797 {
2798   Mat            mat;
2799   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2800   PetscErrorCode ierr;
2801 
2802   PetscFunctionBegin;
2803   *newmat       = 0;
2804   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2805   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2806   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2807   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2808   a    = (Mat_MPIAIJ*)mat->data;
2809 
2810   mat->factortype    = matin->factortype;
2811   mat->rmap->bs      = matin->rmap->bs;
2812   mat->assembled    = PETSC_TRUE;
2813   mat->insertmode   = NOT_SET_VALUES;
2814   mat->preallocated = PETSC_TRUE;
2815 
2816   a->size           = oldmat->size;
2817   a->rank           = oldmat->rank;
2818   a->donotstash     = oldmat->donotstash;
2819   a->roworiented    = oldmat->roworiented;
2820   a->rowindices     = 0;
2821   a->rowvalues      = 0;
2822   a->getrowactive   = PETSC_FALSE;
2823 
2824   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2825   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2826 
2827   if (oldmat->colmap) {
2828 #if defined (PETSC_USE_CTABLE)
2829     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2830 #else
2831     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2832     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2833     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2834 #endif
2835   } else a->colmap = 0;
2836   if (oldmat->garray) {
2837     PetscInt len;
2838     len  = oldmat->B->cmap->n;
2839     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2840     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2841     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2842   } else a->garray = 0;
2843 
2844   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2845   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2846   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2847   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2848   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2849   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2850   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2851   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2852   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2853   *newmat = mat;
2854   PetscFunctionReturn(0);
2855 }
2856 
2857 #undef __FUNCT__
2858 #define __FUNCT__ "MatLoad_MPIAIJ"
2859 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2860 {
2861   Mat            A;
2862   PetscScalar    *vals,*svals;
2863   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2864   MPI_Status     status;
2865   PetscErrorCode ierr;
2866   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz;
2867   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2868   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2869   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2870   PetscInt       cend,cstart,n,*rowners;
2871   int            fd;
2872 
2873   PetscFunctionBegin;
2874   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2875   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2876   if (!rank) {
2877     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2878     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2879     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2880   }
2881 
2882   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2883   M = header[1]; N = header[2];
2884   /* determine ownership of all rows */
2885   m    = M/size + ((M % size) > rank);
2886   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2887   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2888 
2889   /* First process needs enough room for process with most rows */
2890   if (!rank) {
2891     mmax       = rowners[1];
2892     for (i=2; i<size; i++) {
2893       mmax = PetscMax(mmax,rowners[i]);
2894     }
2895   } else mmax = m;
2896 
2897   rowners[0] = 0;
2898   for (i=2; i<=size; i++) {
2899     rowners[i] += rowners[i-1];
2900   }
2901   rstart = rowners[rank];
2902   rend   = rowners[rank+1];
2903 
2904   /* distribute row lengths to all processors */
2905   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2906   if (!rank) {
2907     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2908     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2909     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2910     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2911     for (j=0; j<m; j++) {
2912       procsnz[0] += ourlens[j];
2913     }
2914     for (i=1; i<size; i++) {
2915       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2916       /* calculate the number of nonzeros on each processor */
2917       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2918         procsnz[i] += rowlengths[j];
2919       }
2920       mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]);
2921       ierr   = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2922     }
2923     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2924   } else {
2925     mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr);
2926     ierr   = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2927   }
2928 
2929   if (!rank) {
2930     /* determine max buffer needed and allocate it */
2931     maxnz = 0;
2932     for (i=0; i<size; i++) {
2933       maxnz = PetscMax(maxnz,procsnz[i]);
2934     }
2935     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2936 
2937     /* read in my part of the matrix column indices  */
2938     nz   = procsnz[0];
2939     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2940     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2941 
2942     /* read in every one elses and ship off */
2943     for (i=1; i<size; i++) {
2944       nz     = procsnz[i];
2945       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2946       mpicnt = PetscMPIIntCast(nz);
2947       ierr   = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2948     }
2949     ierr = PetscFree(cols);CHKERRQ(ierr);
2950   } else {
2951     /* determine buffer space needed for message */
2952     nz = 0;
2953     for (i=0; i<m; i++) {
2954       nz += ourlens[i];
2955     }
2956     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2957 
2958     /* receive message of column indices*/
2959     mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr);
2960     ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2961     ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr);
2962     if (mpimaxnz == MPI_UNDEFINED) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);
2963     else if (mpimaxnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);
2964     else if (mpimaxnz != mpicnt) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);
2965   }
2966 
2967   /* determine column ownership if matrix is not square */
2968   if (N != M) {
2969     n      = N/size + ((N % size) > rank);
2970     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2971     cstart = cend - n;
2972   } else {
2973     cstart = rstart;
2974     cend   = rend;
2975     n      = cend - cstart;
2976   }
2977 
2978   /* loop over local rows, determining number of off diagonal entries */
2979   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2980   jj = 0;
2981   for (i=0; i<m; i++) {
2982     for (j=0; j<ourlens[i]; j++) {
2983       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2984       jj++;
2985     }
2986   }
2987 
2988   /* create our matrix */
2989   for (i=0; i<m; i++) {
2990     ourlens[i] -= offlens[i];
2991   }
2992   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2993   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2994   ierr = MatSetType(A,type);CHKERRQ(ierr);
2995   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2996 
2997   for (i=0; i<m; i++) {
2998     ourlens[i] += offlens[i];
2999   }
3000 
3001   if (!rank) {
3002     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3003 
3004     /* read in my part of the matrix numerical values  */
3005     nz   = procsnz[0];
3006     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3007 
3008     /* insert into matrix */
3009     jj      = rstart;
3010     smycols = mycols;
3011     svals   = vals;
3012     for (i=0; i<m; i++) {
3013       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3014       smycols += ourlens[i];
3015       svals   += ourlens[i];
3016       jj++;
3017     }
3018 
3019     /* read in other processors and ship out */
3020     for (i=1; i<size; i++) {
3021       nz     = procsnz[i];
3022       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3023       mpicnt = PetscMPIIntCast(nz);
3024       ierr   = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3025     }
3026     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3027   } else {
3028     /* receive numeric values */
3029     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3030 
3031     /* receive message of values*/
3032     mpicnt = PetscMPIIntCast(nz);
3033     ierr   = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
3034     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr);
3035     if (mpimaxnz == MPI_UNDEFINED) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);
3036     else if (mpimaxnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);
3037     else if (mpimaxnz != mpicnt) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);
3038 
3039     /* insert into matrix */
3040     jj      = rstart;
3041     smycols = mycols;
3042     svals   = vals;
3043     for (i=0; i<m; i++) {
3044       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3045       smycols += ourlens[i];
3046       svals   += ourlens[i];
3047       jj++;
3048     }
3049   }
3050   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3051   ierr = PetscFree(vals);CHKERRQ(ierr);
3052   ierr = PetscFree(mycols);CHKERRQ(ierr);
3053   ierr = PetscFree(rowners);CHKERRQ(ierr);
3054 
3055   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3056   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3057   *newmat = A;
3058   PetscFunctionReturn(0);
3059 }
3060 
3061 #undef __FUNCT__
3062 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3063 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3064 {
3065   PetscErrorCode ierr;
3066   IS             iscol_local;
3067   PetscInt       csize;
3068 
3069   PetscFunctionBegin;
3070   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3071   if (call == MAT_REUSE_MATRIX) {
3072     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3073     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3074   } else {
3075     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3076   }
3077   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3078   if (call == MAT_INITIAL_MATRIX) {
3079     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3080     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3081   }
3082   PetscFunctionReturn(0);
3083 }
3084 
3085 #undef __FUNCT__
3086 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3087 /*
3088     Not great since it makes two copies of the submatrix, first an SeqAIJ
3089   in local and then by concatenating the local matrices the end result.
3090   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3091 
3092   Note: This requires a sequential iscol with all indices.
3093 */
3094 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3095 {
3096   PetscErrorCode ierr;
3097   PetscMPIInt    rank,size;
3098   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3099   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3100   Mat            *local,M,Mreuse;
3101   MatScalar      *vwork,*aa;
3102   MPI_Comm       comm = ((PetscObject)mat)->comm;
3103   Mat_SeqAIJ     *aij;
3104 
3105 
3106   PetscFunctionBegin;
3107   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3108   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3109 
3110   if (call ==  MAT_REUSE_MATRIX) {
3111     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3112     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3113     local = &Mreuse;
3114     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3115   } else {
3116     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3117     Mreuse = *local;
3118     ierr   = PetscFree(local);CHKERRQ(ierr);
3119   }
3120 
3121   /*
3122       m - number of local rows
3123       n - number of columns (same on all processors)
3124       rstart - first row in new global matrix generated
3125   */
3126   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3127   if (call == MAT_INITIAL_MATRIX) {
3128     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3129     ii  = aij->i;
3130     jj  = aij->j;
3131 
3132     /*
3133         Determine the number of non-zeros in the diagonal and off-diagonal
3134         portions of the matrix in order to do correct preallocation
3135     */
3136 
3137     /* first get start and end of "diagonal" columns */
3138     if (csize == PETSC_DECIDE) {
3139       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3140       if (mglobal == n) { /* square matrix */
3141 	nlocal = m;
3142       } else {
3143         nlocal = n/size + ((n % size) > rank);
3144       }
3145     } else {
3146       nlocal = csize;
3147     }
3148     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3149     rstart = rend - nlocal;
3150     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3151 
3152     /* next, compute all the lengths */
3153     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3154     olens = dlens + m;
3155     for (i=0; i<m; i++) {
3156       jend = ii[i+1] - ii[i];
3157       olen = 0;
3158       dlen = 0;
3159       for (j=0; j<jend; j++) {
3160         if (*jj < rstart || *jj >= rend) olen++;
3161         else dlen++;
3162         jj++;
3163       }
3164       olens[i] = olen;
3165       dlens[i] = dlen;
3166     }
3167     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3168     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3169     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3170     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3171     ierr = PetscFree(dlens);CHKERRQ(ierr);
3172   } else {
3173     PetscInt ml,nl;
3174 
3175     M = *newmat;
3176     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3177     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3178     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3179     /*
3180          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3181        rather than the slower MatSetValues().
3182     */
3183     M->was_assembled = PETSC_TRUE;
3184     M->assembled     = PETSC_FALSE;
3185   }
3186   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3187   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3188   ii  = aij->i;
3189   jj  = aij->j;
3190   aa  = aij->a;
3191   for (i=0; i<m; i++) {
3192     row   = rstart + i;
3193     nz    = ii[i+1] - ii[i];
3194     cwork = jj;     jj += nz;
3195     vwork = aa;     aa += nz;
3196     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3197   }
3198 
3199   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3200   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3201   *newmat = M;
3202 
3203   /* save submatrix used in processor for next request */
3204   if (call ==  MAT_INITIAL_MATRIX) {
3205     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3206     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3207   }
3208 
3209   PetscFunctionReturn(0);
3210 }
3211 
3212 EXTERN_C_BEGIN
3213 #undef __FUNCT__
3214 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3215 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3216 {
3217   PetscInt       m,cstart, cend,j,nnz,i,d;
3218   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3219   const PetscInt *JJ;
3220   PetscScalar    *values;
3221   PetscErrorCode ierr;
3222 
3223   PetscFunctionBegin;
3224   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3225 
3226   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3227   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3228   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3229   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3230   m      = B->rmap->n;
3231   cstart = B->cmap->rstart;
3232   cend   = B->cmap->rend;
3233   rstart = B->rmap->rstart;
3234 
3235   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3236 
3237 #if defined(PETSC_USE_DEBUGGING)
3238   for (i=0; i<m; i++) {
3239     nnz     = Ii[i+1]- Ii[i];
3240     JJ      = J + Ii[i];
3241     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3242     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3243     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3244   }
3245 #endif
3246 
3247   for (i=0; i<m; i++) {
3248     nnz     = Ii[i+1]- Ii[i];
3249     JJ      = J + Ii[i];
3250     nnz_max = PetscMax(nnz_max,nnz);
3251     d       = 0;
3252     for (j=0; j<nnz; j++) {
3253       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3254     }
3255     d_nnz[i] = d;
3256     o_nnz[i] = nnz - d;
3257   }
3258   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3259   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3260 
3261   if (v) values = (PetscScalar*)v;
3262   else {
3263     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3264     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3265   }
3266 
3267   for (i=0; i<m; i++) {
3268     ii   = i + rstart;
3269     nnz  = Ii[i+1]- Ii[i];
3270     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3271   }
3272   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3273   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3274 
3275   if (!v) {
3276     ierr = PetscFree(values);CHKERRQ(ierr);
3277   }
3278   PetscFunctionReturn(0);
3279 }
3280 EXTERN_C_END
3281 
3282 #undef __FUNCT__
3283 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3284 /*@
3285    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3286    (the default parallel PETSc format).
3287 
3288    Collective on MPI_Comm
3289 
3290    Input Parameters:
3291 +  B - the matrix
3292 .  i - the indices into j for the start of each local row (starts with zero)
3293 .  j - the column indices for each local row (starts with zero)
3294 -  v - optional values in the matrix
3295 
3296    Level: developer
3297 
3298    Notes:
3299        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3300      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3301      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3302 
3303        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3304 
3305        The format which is used for the sparse matrix input, is equivalent to a
3306     row-major ordering.. i.e for the following matrix, the input data expected is
3307     as shown:
3308 
3309         1 0 0
3310         2 0 3     P0
3311        -------
3312         4 5 6     P1
3313 
3314      Process0 [P0]: rows_owned=[0,1]
3315         i =  {0,1,3}  [size = nrow+1  = 2+1]
3316         j =  {0,0,2}  [size = nz = 6]
3317         v =  {1,2,3}  [size = nz = 6]
3318 
3319      Process1 [P1]: rows_owned=[2]
3320         i =  {0,3}    [size = nrow+1  = 1+1]
3321         j =  {0,1,2}  [size = nz = 6]
3322         v =  {4,5,6}  [size = nz = 6]
3323 
3324 .keywords: matrix, aij, compressed row, sparse, parallel
3325 
3326 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3327           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3328 @*/
3329 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3330 {
3331   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3332 
3333   PetscFunctionBegin;
3334   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3335   if (f) {
3336     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3337   }
3338   PetscFunctionReturn(0);
3339 }
3340 
3341 #undef __FUNCT__
3342 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3343 /*@C
3344    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3345    (the default parallel PETSc format).  For good matrix assembly performance
3346    the user should preallocate the matrix storage by setting the parameters
3347    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3348    performance can be increased by more than a factor of 50.
3349 
3350    Collective on MPI_Comm
3351 
3352    Input Parameters:
3353 +  A - the matrix
3354 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3355            (same value is used for all local rows)
3356 .  d_nnz - array containing the number of nonzeros in the various rows of the
3357            DIAGONAL portion of the local submatrix (possibly different for each row)
3358            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3359            The size of this array is equal to the number of local rows, i.e 'm'.
3360            You must leave room for the diagonal entry even if it is zero.
3361 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3362            submatrix (same value is used for all local rows).
3363 -  o_nnz - array containing the number of nonzeros in the various rows of the
3364            OFF-DIAGONAL portion of the local submatrix (possibly different for
3365            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3366            structure. The size of this array is equal to the number
3367            of local rows, i.e 'm'.
3368 
3369    If the *_nnz parameter is given then the *_nz parameter is ignored
3370 
3371    The AIJ format (also called the Yale sparse matrix format or
3372    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3373    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3374 
3375    The parallel matrix is partitioned such that the first m0 rows belong to
3376    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3377    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3378 
3379    The DIAGONAL portion of the local submatrix of a processor can be defined
3380    as the submatrix which is obtained by extraction the part corresponding
3381    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3382    first row that belongs to the processor, and r2 is the last row belonging
3383    to the this processor. This is a square mxm matrix. The remaining portion
3384    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3385 
3386    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3387 
3388    You can call MatGetInfo() to get information on how effective the preallocation was;
3389    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3390    You can also run with the option -info and look for messages with the string
3391    malloc in them to see if additional memory allocation was needed.
3392 
3393    Example usage:
3394 
3395    Consider the following 8x8 matrix with 34 non-zero values, that is
3396    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3397    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3398    as follows:
3399 
3400 .vb
3401             1  2  0  |  0  3  0  |  0  4
3402     Proc0   0  5  6  |  7  0  0  |  8  0
3403             9  0 10  | 11  0  0  | 12  0
3404     -------------------------------------
3405            13  0 14  | 15 16 17  |  0  0
3406     Proc1   0 18  0  | 19 20 21  |  0  0
3407             0  0  0  | 22 23  0  | 24  0
3408     -------------------------------------
3409     Proc2  25 26 27  |  0  0 28  | 29  0
3410            30  0  0  | 31 32 33  |  0 34
3411 .ve
3412 
3413    This can be represented as a collection of submatrices as:
3414 
3415 .vb
3416       A B C
3417       D E F
3418       G H I
3419 .ve
3420 
3421    Where the submatrices A,B,C are owned by proc0, D,E,F are
3422    owned by proc1, G,H,I are owned by proc2.
3423 
3424    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3425    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3426    The 'M','N' parameters are 8,8, and have the same values on all procs.
3427 
3428    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3429    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3430    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3431    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3432    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3433    matrix, ans [DF] as another SeqAIJ matrix.
3434 
3435    When d_nz, o_nz parameters are specified, d_nz storage elements are
3436    allocated for every row of the local diagonal submatrix, and o_nz
3437    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3438    One way to choose d_nz and o_nz is to use the max nonzerors per local
3439    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3440    In this case, the values of d_nz,o_nz are:
3441 .vb
3442      proc0 : dnz = 2, o_nz = 2
3443      proc1 : dnz = 3, o_nz = 2
3444      proc2 : dnz = 1, o_nz = 4
3445 .ve
3446    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3447    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3448    for proc3. i.e we are using 12+15+10=37 storage locations to store
3449    34 values.
3450 
3451    When d_nnz, o_nnz parameters are specified, the storage is specified
3452    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3453    In the above case the values for d_nnz,o_nnz are:
3454 .vb
3455      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3456      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3457      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3458 .ve
3459    Here the space allocated is sum of all the above values i.e 34, and
3460    hence pre-allocation is perfect.
3461 
3462    Level: intermediate
3463 
3464 .keywords: matrix, aij, compressed row, sparse, parallel
3465 
3466 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3467           MPIAIJ, MatGetInfo()
3468 @*/
3469 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3470 {
3471   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3472 
3473   PetscFunctionBegin;
3474   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3475   if (f) {
3476     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3477   }
3478   PetscFunctionReturn(0);
3479 }
3480 
3481 #undef __FUNCT__
3482 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3483 /*@
3484      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3485          CSR format the local rows.
3486 
3487    Collective on MPI_Comm
3488 
3489    Input Parameters:
3490 +  comm - MPI communicator
3491 .  m - number of local rows (Cannot be PETSC_DECIDE)
3492 .  n - This value should be the same as the local size used in creating the
3493        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3494        calculated if N is given) For square matrices n is almost always m.
3495 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3496 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3497 .   i - row indices
3498 .   j - column indices
3499 -   a - matrix values
3500 
3501    Output Parameter:
3502 .   mat - the matrix
3503 
3504    Level: intermediate
3505 
3506    Notes:
3507        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3508      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3509      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3510 
3511        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3512 
3513        The format which is used for the sparse matrix input, is equivalent to a
3514     row-major ordering.. i.e for the following matrix, the input data expected is
3515     as shown:
3516 
3517         1 0 0
3518         2 0 3     P0
3519        -------
3520         4 5 6     P1
3521 
3522      Process0 [P0]: rows_owned=[0,1]
3523         i =  {0,1,3}  [size = nrow+1  = 2+1]
3524         j =  {0,0,2}  [size = nz = 6]
3525         v =  {1,2,3}  [size = nz = 6]
3526 
3527      Process1 [P1]: rows_owned=[2]
3528         i =  {0,3}    [size = nrow+1  = 1+1]
3529         j =  {0,1,2}  [size = nz = 6]
3530         v =  {4,5,6}  [size = nz = 6]
3531 
3532 .keywords: matrix, aij, compressed row, sparse, parallel
3533 
3534 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3535           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3536 @*/
3537 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3538 {
3539   PetscErrorCode ierr;
3540 
3541  PetscFunctionBegin;
3542   if (i[0]) {
3543     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3544   }
3545   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3546   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3547   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3548   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3549   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3550   PetscFunctionReturn(0);
3551 }
3552 
3553 #undef __FUNCT__
3554 #define __FUNCT__ "MatCreateMPIAIJ"
3555 /*@C
3556    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3557    (the default parallel PETSc format).  For good matrix assembly performance
3558    the user should preallocate the matrix storage by setting the parameters
3559    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3560    performance can be increased by more than a factor of 50.
3561 
3562    Collective on MPI_Comm
3563 
3564    Input Parameters:
3565 +  comm - MPI communicator
3566 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3567            This value should be the same as the local size used in creating the
3568            y vector for the matrix-vector product y = Ax.
3569 .  n - This value should be the same as the local size used in creating the
3570        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3571        calculated if N is given) For square matrices n is almost always m.
3572 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3573 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3574 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3575            (same value is used for all local rows)
3576 .  d_nnz - array containing the number of nonzeros in the various rows of the
3577            DIAGONAL portion of the local submatrix (possibly different for each row)
3578            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3579            The size of this array is equal to the number of local rows, i.e 'm'.
3580            You must leave room for the diagonal entry even if it is zero.
3581 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3582            submatrix (same value is used for all local rows).
3583 -  o_nnz - array containing the number of nonzeros in the various rows of the
3584            OFF-DIAGONAL portion of the local submatrix (possibly different for
3585            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3586            structure. The size of this array is equal to the number
3587            of local rows, i.e 'm'.
3588 
3589    Output Parameter:
3590 .  A - the matrix
3591 
3592    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3593    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3594    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3595 
3596    Notes:
3597    If the *_nnz parameter is given then the *_nz parameter is ignored
3598 
3599    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3600    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3601    storage requirements for this matrix.
3602 
3603    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3604    processor than it must be used on all processors that share the object for
3605    that argument.
3606 
3607    The user MUST specify either the local or global matrix dimensions
3608    (possibly both).
3609 
3610    The parallel matrix is partitioned across processors such that the
3611    first m0 rows belong to process 0, the next m1 rows belong to
3612    process 1, the next m2 rows belong to process 2 etc.. where
3613    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3614    values corresponding to [m x N] submatrix.
3615 
3616    The columns are logically partitioned with the n0 columns belonging
3617    to 0th partition, the next n1 columns belonging to the next
3618    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3619 
3620    The DIAGONAL portion of the local submatrix on any given processor
3621    is the submatrix corresponding to the rows and columns m,n
3622    corresponding to the given processor. i.e diagonal matrix on
3623    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3624    etc. The remaining portion of the local submatrix [m x (N-n)]
3625    constitute the OFF-DIAGONAL portion. The example below better
3626    illustrates this concept.
3627 
3628    For a square global matrix we define each processor's diagonal portion
3629    to be its local rows and the corresponding columns (a square submatrix);
3630    each processor's off-diagonal portion encompasses the remainder of the
3631    local matrix (a rectangular submatrix).
3632 
3633    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3634 
3635    When calling this routine with a single process communicator, a matrix of
3636    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3637    type of communicator, use the construction mechanism:
3638      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3639 
3640    By default, this format uses inodes (identical nodes) when possible.
3641    We search for consecutive rows with the same nonzero structure, thereby
3642    reusing matrix information to achieve increased efficiency.
3643 
3644    Options Database Keys:
3645 +  -mat_no_inode  - Do not use inodes
3646 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3647 -  -mat_aij_oneindex - Internally use indexing starting at 1
3648         rather than 0.  Note that when calling MatSetValues(),
3649         the user still MUST index entries starting at 0!
3650 
3651 
3652    Example usage:
3653 
3654    Consider the following 8x8 matrix with 34 non-zero values, that is
3655    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3656    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3657    as follows:
3658 
3659 .vb
3660             1  2  0  |  0  3  0  |  0  4
3661     Proc0   0  5  6  |  7  0  0  |  8  0
3662             9  0 10  | 11  0  0  | 12  0
3663     -------------------------------------
3664            13  0 14  | 15 16 17  |  0  0
3665     Proc1   0 18  0  | 19 20 21  |  0  0
3666             0  0  0  | 22 23  0  | 24  0
3667     -------------------------------------
3668     Proc2  25 26 27  |  0  0 28  | 29  0
3669            30  0  0  | 31 32 33  |  0 34
3670 .ve
3671 
3672    This can be represented as a collection of submatrices as:
3673 
3674 .vb
3675       A B C
3676       D E F
3677       G H I
3678 .ve
3679 
3680    Where the submatrices A,B,C are owned by proc0, D,E,F are
3681    owned by proc1, G,H,I are owned by proc2.
3682 
3683    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3684    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3685    The 'M','N' parameters are 8,8, and have the same values on all procs.
3686 
3687    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3688    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3689    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3690    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3691    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3692    matrix, ans [DF] as another SeqAIJ matrix.
3693 
3694    When d_nz, o_nz parameters are specified, d_nz storage elements are
3695    allocated for every row of the local diagonal submatrix, and o_nz
3696    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3697    One way to choose d_nz and o_nz is to use the max nonzerors per local
3698    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3699    In this case, the values of d_nz,o_nz are:
3700 .vb
3701      proc0 : dnz = 2, o_nz = 2
3702      proc1 : dnz = 3, o_nz = 2
3703      proc2 : dnz = 1, o_nz = 4
3704 .ve
3705    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3706    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3707    for proc3. i.e we are using 12+15+10=37 storage locations to store
3708    34 values.
3709 
3710    When d_nnz, o_nnz parameters are specified, the storage is specified
3711    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3712    In the above case the values for d_nnz,o_nnz are:
3713 .vb
3714      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3715      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3716      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3717 .ve
3718    Here the space allocated is sum of all the above values i.e 34, and
3719    hence pre-allocation is perfect.
3720 
3721    Level: intermediate
3722 
3723 .keywords: matrix, aij, compressed row, sparse, parallel
3724 
3725 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3726           MPIAIJ, MatCreateMPIAIJWithArrays()
3727 @*/
3728 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3729 {
3730   PetscErrorCode ierr;
3731   PetscMPIInt    size;
3732 
3733   PetscFunctionBegin;
3734   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3735   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3736   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3737   if (size > 1) {
3738     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3739     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3740   } else {
3741     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3742     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3743   }
3744   PetscFunctionReturn(0);
3745 }
3746 
3747 #undef __FUNCT__
3748 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3749 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3750 {
3751   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3752 
3753   PetscFunctionBegin;
3754   *Ad     = a->A;
3755   *Ao     = a->B;
3756   *colmap = a->garray;
3757   PetscFunctionReturn(0);
3758 }
3759 
3760 #undef __FUNCT__
3761 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3762 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3763 {
3764   PetscErrorCode ierr;
3765   PetscInt       i;
3766   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3767 
3768   PetscFunctionBegin;
3769   if (coloring->ctype == IS_COLORING_GLOBAL) {
3770     ISColoringValue *allcolors,*colors;
3771     ISColoring      ocoloring;
3772 
3773     /* set coloring for diagonal portion */
3774     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3775 
3776     /* set coloring for off-diagonal portion */
3777     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3778     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3779     for (i=0; i<a->B->cmap->n; i++) {
3780       colors[i] = allcolors[a->garray[i]];
3781     }
3782     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3783     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3784     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3785     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3786   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3787     ISColoringValue *colors;
3788     PetscInt        *larray;
3789     ISColoring      ocoloring;
3790 
3791     /* set coloring for diagonal portion */
3792     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3793     for (i=0; i<a->A->cmap->n; i++) {
3794       larray[i] = i + A->cmap->rstart;
3795     }
3796     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3797     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3798     for (i=0; i<a->A->cmap->n; i++) {
3799       colors[i] = coloring->colors[larray[i]];
3800     }
3801     ierr = PetscFree(larray);CHKERRQ(ierr);
3802     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3803     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3804     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3805 
3806     /* set coloring for off-diagonal portion */
3807     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3808     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3809     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3810     for (i=0; i<a->B->cmap->n; i++) {
3811       colors[i] = coloring->colors[larray[i]];
3812     }
3813     ierr = PetscFree(larray);CHKERRQ(ierr);
3814     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3815     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3816     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3817   } else {
3818     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3819   }
3820 
3821   PetscFunctionReturn(0);
3822 }
3823 
3824 #if defined(PETSC_HAVE_ADIC)
3825 #undef __FUNCT__
3826 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3827 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3828 {
3829   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3830   PetscErrorCode ierr;
3831 
3832   PetscFunctionBegin;
3833   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3834   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3835   PetscFunctionReturn(0);
3836 }
3837 #endif
3838 
3839 #undef __FUNCT__
3840 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3841 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3842 {
3843   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3844   PetscErrorCode ierr;
3845 
3846   PetscFunctionBegin;
3847   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3848   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3849   PetscFunctionReturn(0);
3850 }
3851 
3852 #undef __FUNCT__
3853 #define __FUNCT__ "MatMerge"
3854 /*@
3855       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3856                  matrices from each processor
3857 
3858     Collective on MPI_Comm
3859 
3860    Input Parameters:
3861 +    comm - the communicators the parallel matrix will live on
3862 .    inmat - the input sequential matrices
3863 .    n - number of local columns (or PETSC_DECIDE)
3864 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3865 
3866    Output Parameter:
3867 .    outmat - the parallel matrix generated
3868 
3869     Level: advanced
3870 
3871    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3872 
3873 @*/
3874 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3875 {
3876   PetscErrorCode ierr;
3877   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3878   PetscInt       *indx;
3879   PetscScalar    *values;
3880 
3881   PetscFunctionBegin;
3882   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3883   if (scall == MAT_INITIAL_MATRIX){
3884     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3885     if (n == PETSC_DECIDE){
3886       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3887     }
3888     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3889     rstart -= m;
3890 
3891     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3892     for (i=0;i<m;i++) {
3893       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3894       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3895       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3896     }
3897     /* This routine will ONLY return MPIAIJ type matrix */
3898     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3899     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3900     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3901     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3902     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3903 
3904   } else if (scall == MAT_REUSE_MATRIX){
3905     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3906   } else {
3907     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3908   }
3909 
3910   for (i=0;i<m;i++) {
3911     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3912     Ii    = i + rstart;
3913     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3914     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3915   }
3916   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3917   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3918   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3919 
3920   PetscFunctionReturn(0);
3921 }
3922 
3923 #undef __FUNCT__
3924 #define __FUNCT__ "MatFileSplit"
3925 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3926 {
3927   PetscErrorCode    ierr;
3928   PetscMPIInt       rank;
3929   PetscInt          m,N,i,rstart,nnz;
3930   size_t            len;
3931   const PetscInt    *indx;
3932   PetscViewer       out;
3933   char              *name;
3934   Mat               B;
3935   const PetscScalar *values;
3936 
3937   PetscFunctionBegin;
3938   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3939   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3940   /* Should this be the type of the diagonal block of A? */
3941   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3942   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3943   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3944   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3945   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3946   for (i=0;i<m;i++) {
3947     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3948     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3949     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3950   }
3951   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3952   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3953 
3954   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3955   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3956   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3957   sprintf(name,"%s.%d",outfile,rank);
3958   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3959   ierr = PetscFree(name);
3960   ierr = MatView(B,out);CHKERRQ(ierr);
3961   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3962   ierr = MatDestroy(B);CHKERRQ(ierr);
3963   PetscFunctionReturn(0);
3964 }
3965 
3966 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3967 #undef __FUNCT__
3968 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3969 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3970 {
3971   PetscErrorCode       ierr;
3972   Mat_Merge_SeqsToMPI  *merge;
3973   PetscContainer       container;
3974 
3975   PetscFunctionBegin;
3976   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3977   if (container) {
3978     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3979     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3980     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3981     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3982     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3983     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3984     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
3985     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3986     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
3987     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3988     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3989     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3990     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3991     ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr);
3992 
3993     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
3994     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3995   }
3996   ierr = PetscFree(merge);CHKERRQ(ierr);
3997 
3998   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3999   PetscFunctionReturn(0);
4000 }
4001 
4002 #include "../src/mat/utils/freespace.h"
4003 #include "petscbt.h"
4004 
4005 #undef __FUNCT__
4006 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4007 /*@C
4008       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4009                  matrices from each processor
4010 
4011     Collective on MPI_Comm
4012 
4013    Input Parameters:
4014 +    comm - the communicators the parallel matrix will live on
4015 .    seqmat - the input sequential matrices
4016 .    m - number of local rows (or PETSC_DECIDE)
4017 .    n - number of local columns (or PETSC_DECIDE)
4018 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4019 
4020    Output Parameter:
4021 .    mpimat - the parallel matrix generated
4022 
4023     Level: advanced
4024 
4025    Notes:
4026      The dimensions of the sequential matrix in each processor MUST be the same.
4027      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4028      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4029 @*/
4030 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4031 {
4032   PetscErrorCode       ierr;
4033   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4034   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4035   PetscMPIInt          size,rank,taga,*len_s;
4036   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4037   PetscInt             proc,m;
4038   PetscInt             **buf_ri,**buf_rj;
4039   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4040   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4041   MPI_Request          *s_waits,*r_waits;
4042   MPI_Status           *status;
4043   MatScalar            *aa=a->a;
4044   MatScalar            **abuf_r,*ba_i;
4045   Mat_Merge_SeqsToMPI  *merge;
4046   PetscContainer       container;
4047 
4048   PetscFunctionBegin;
4049   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4050 
4051   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4052   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4053 
4054   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4055   if (container) {
4056     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4057   }
4058   bi     = merge->bi;
4059   bj     = merge->bj;
4060   buf_ri = merge->buf_ri;
4061   buf_rj = merge->buf_rj;
4062 
4063   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4064   owners = merge->rowmap->range;
4065   len_s  = merge->len_s;
4066 
4067   /* send and recv matrix values */
4068   /*-----------------------------*/
4069   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4070   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4071 
4072   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4073   for (proc=0,k=0; proc<size; proc++){
4074     if (!len_s[proc]) continue;
4075     i = owners[proc];
4076     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4077     k++;
4078   }
4079 
4080   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4081   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4082   ierr = PetscFree(status);CHKERRQ(ierr);
4083 
4084   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4085   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4086 
4087   /* insert mat values of mpimat */
4088   /*----------------------------*/
4089   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4090   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4091 
4092   for (k=0; k<merge->nrecv; k++){
4093     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4094     nrows = *(buf_ri_k[k]);
4095     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4096     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4097   }
4098 
4099   /* set values of ba */
4100   m = merge->rowmap->n;
4101   for (i=0; i<m; i++) {
4102     arow = owners[rank] + i;
4103     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4104     bnzi = bi[i+1] - bi[i];
4105     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4106 
4107     /* add local non-zero vals of this proc's seqmat into ba */
4108     anzi = ai[arow+1] - ai[arow];
4109     aj   = a->j + ai[arow];
4110     aa   = a->a + ai[arow];
4111     nextaj = 0;
4112     for (j=0; nextaj<anzi; j++){
4113       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4114         ba_i[j] += aa[nextaj++];
4115       }
4116     }
4117 
4118     /* add received vals into ba */
4119     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4120       /* i-th row */
4121       if (i == *nextrow[k]) {
4122         anzi = *(nextai[k]+1) - *nextai[k];
4123         aj   = buf_rj[k] + *(nextai[k]);
4124         aa   = abuf_r[k] + *(nextai[k]);
4125         nextaj = 0;
4126         for (j=0; nextaj<anzi; j++){
4127           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4128             ba_i[j] += aa[nextaj++];
4129           }
4130         }
4131         nextrow[k]++; nextai[k]++;
4132       }
4133     }
4134     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4135   }
4136   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4137   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4138 
4139   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4140   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4141   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4142   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4143   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4144   PetscFunctionReturn(0);
4145 }
4146 
4147 #undef __FUNCT__
4148 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4149 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4150 {
4151   PetscErrorCode       ierr;
4152   Mat                  B_mpi;
4153   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4154   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4155   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4156   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4157   PetscInt             len,proc,*dnz,*onz;
4158   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4159   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4160   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4161   MPI_Status           *status;
4162   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4163   PetscBT              lnkbt;
4164   Mat_Merge_SeqsToMPI  *merge;
4165   PetscContainer       container;
4166 
4167   PetscFunctionBegin;
4168   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4169 
4170   /* make sure it is a PETSc comm */
4171   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4172   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4173   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4174 
4175   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4176   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4177 
4178   /* determine row ownership */
4179   /*---------------------------------------------------------*/
4180   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4181   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4182   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4183   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4184   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4185   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4186   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4187 
4188   m      = merge->rowmap->n;
4189   M      = merge->rowmap->N;
4190   owners = merge->rowmap->range;
4191 
4192   /* determine the number of messages to send, their lengths */
4193   /*---------------------------------------------------------*/
4194   len_s  = merge->len_s;
4195 
4196   len = 0;  /* length of buf_si[] */
4197   merge->nsend = 0;
4198   for (proc=0; proc<size; proc++){
4199     len_si[proc] = 0;
4200     if (proc == rank){
4201       len_s[proc] = 0;
4202     } else {
4203       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4204       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4205     }
4206     if (len_s[proc]) {
4207       merge->nsend++;
4208       nrows = 0;
4209       for (i=owners[proc]; i<owners[proc+1]; i++){
4210         if (ai[i+1] > ai[i]) nrows++;
4211       }
4212       len_si[proc] = 2*(nrows+1);
4213       len += len_si[proc];
4214     }
4215   }
4216 
4217   /* determine the number and length of messages to receive for ij-structure */
4218   /*-------------------------------------------------------------------------*/
4219   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4220   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4221 
4222   /* post the Irecv of j-structure */
4223   /*-------------------------------*/
4224   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4225   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4226 
4227   /* post the Isend of j-structure */
4228   /*--------------------------------*/
4229   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4230 
4231   for (proc=0, k=0; proc<size; proc++){
4232     if (!len_s[proc]) continue;
4233     i = owners[proc];
4234     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4235     k++;
4236   }
4237 
4238   /* receives and sends of j-structure are complete */
4239   /*------------------------------------------------*/
4240   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4241   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4242 
4243   /* send and recv i-structure */
4244   /*---------------------------*/
4245   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4246   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4247 
4248   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4249   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4250   for (proc=0,k=0; proc<size; proc++){
4251     if (!len_s[proc]) continue;
4252     /* form outgoing message for i-structure:
4253          buf_si[0]:                 nrows to be sent
4254                [1:nrows]:           row index (global)
4255                [nrows+1:2*nrows+1]: i-structure index
4256     */
4257     /*-------------------------------------------*/
4258     nrows = len_si[proc]/2 - 1;
4259     buf_si_i    = buf_si + nrows+1;
4260     buf_si[0]   = nrows;
4261     buf_si_i[0] = 0;
4262     nrows = 0;
4263     for (i=owners[proc]; i<owners[proc+1]; i++){
4264       anzi = ai[i+1] - ai[i];
4265       if (anzi) {
4266         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4267         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4268         nrows++;
4269       }
4270     }
4271     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4272     k++;
4273     buf_si += len_si[proc];
4274   }
4275 
4276   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4277   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4278 
4279   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4280   for (i=0; i<merge->nrecv; i++){
4281     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4282   }
4283 
4284   ierr = PetscFree(len_si);CHKERRQ(ierr);
4285   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4286   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4287   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4288   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4289   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4290   ierr = PetscFree(status);CHKERRQ(ierr);
4291 
4292   /* compute a local seq matrix in each processor */
4293   /*----------------------------------------------*/
4294   /* allocate bi array and free space for accumulating nonzero column info */
4295   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4296   bi[0] = 0;
4297 
4298   /* create and initialize a linked list */
4299   nlnk = N+1;
4300   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4301 
4302   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4303   len = 0;
4304   len  = ai[owners[rank+1]] - ai[owners[rank]];
4305   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4306   current_space = free_space;
4307 
4308   /* determine symbolic info for each local row */
4309   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4310 
4311   for (k=0; k<merge->nrecv; k++){
4312     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4313     nrows = *buf_ri_k[k];
4314     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4315     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4316   }
4317 
4318   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4319   len = 0;
4320   for (i=0;i<m;i++) {
4321     bnzi   = 0;
4322     /* add local non-zero cols of this proc's seqmat into lnk */
4323     arow   = owners[rank] + i;
4324     anzi   = ai[arow+1] - ai[arow];
4325     aj     = a->j + ai[arow];
4326     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4327     bnzi += nlnk;
4328     /* add received col data into lnk */
4329     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4330       if (i == *nextrow[k]) { /* i-th row */
4331         anzi = *(nextai[k]+1) - *nextai[k];
4332         aj   = buf_rj[k] + *nextai[k];
4333         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4334         bnzi += nlnk;
4335         nextrow[k]++; nextai[k]++;
4336       }
4337     }
4338     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4339 
4340     /* if free space is not available, make more free space */
4341     if (current_space->local_remaining<bnzi) {
4342       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4343       nspacedouble++;
4344     }
4345     /* copy data into free space, then initialize lnk */
4346     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4347     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4348 
4349     current_space->array           += bnzi;
4350     current_space->local_used      += bnzi;
4351     current_space->local_remaining -= bnzi;
4352 
4353     bi[i+1] = bi[i] + bnzi;
4354   }
4355 
4356   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4357 
4358   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4359   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4360   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4361 
4362   /* create symbolic parallel matrix B_mpi */
4363   /*---------------------------------------*/
4364   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4365   if (n==PETSC_DECIDE) {
4366     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4367   } else {
4368     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4369   }
4370   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4371   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4372   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4373 
4374   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4375   B_mpi->assembled     = PETSC_FALSE;
4376   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4377   merge->bi            = bi;
4378   merge->bj            = bj;
4379   merge->buf_ri        = buf_ri;
4380   merge->buf_rj        = buf_rj;
4381   merge->coi           = PETSC_NULL;
4382   merge->coj           = PETSC_NULL;
4383   merge->owners_co     = PETSC_NULL;
4384 
4385   /* attach the supporting struct to B_mpi for reuse */
4386   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4387   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4388   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4389   *mpimat = B_mpi;
4390 
4391   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4392   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4393   PetscFunctionReturn(0);
4394 }
4395 
4396 #undef __FUNCT__
4397 #define __FUNCT__ "MatMerge_SeqsToMPI"
4398 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4399 {
4400   PetscErrorCode   ierr;
4401 
4402   PetscFunctionBegin;
4403   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4404   if (scall == MAT_INITIAL_MATRIX){
4405     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4406   }
4407   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4408   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4409   PetscFunctionReturn(0);
4410 }
4411 
4412 #undef __FUNCT__
4413 #define __FUNCT__ "MatGetLocalMat"
4414 /*@
4415      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4416 
4417     Not Collective
4418 
4419    Input Parameters:
4420 +    A - the matrix
4421 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4422 
4423    Output Parameter:
4424 .    A_loc - the local sequential matrix generated
4425 
4426     Level: developer
4427 
4428 @*/
4429 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4430 {
4431   PetscErrorCode  ierr;
4432   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4433   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4434   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4435   MatScalar       *aa=a->a,*ba=b->a,*cam;
4436   PetscScalar     *ca;
4437   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4438   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4439 
4440   PetscFunctionBegin;
4441   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4442   if (scall == MAT_INITIAL_MATRIX){
4443     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4444     ci[0] = 0;
4445     for (i=0; i<am; i++){
4446       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4447     }
4448     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4449     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4450     k = 0;
4451     for (i=0; i<am; i++) {
4452       ncols_o = bi[i+1] - bi[i];
4453       ncols_d = ai[i+1] - ai[i];
4454       /* off-diagonal portion of A */
4455       for (jo=0; jo<ncols_o; jo++) {
4456         col = cmap[*bj];
4457         if (col >= cstart) break;
4458         cj[k]   = col; bj++;
4459         ca[k++] = *ba++;
4460       }
4461       /* diagonal portion of A */
4462       for (j=0; j<ncols_d; j++) {
4463         cj[k]   = cstart + *aj++;
4464         ca[k++] = *aa++;
4465       }
4466       /* off-diagonal portion of A */
4467       for (j=jo; j<ncols_o; j++) {
4468         cj[k]   = cmap[*bj++];
4469         ca[k++] = *ba++;
4470       }
4471     }
4472     /* put together the new matrix */
4473     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4474     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4475     /* Since these are PETSc arrays, change flags to free them as necessary. */
4476     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4477     mat->free_a  = PETSC_TRUE;
4478     mat->free_ij = PETSC_TRUE;
4479     mat->nonew   = 0;
4480   } else if (scall == MAT_REUSE_MATRIX){
4481     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4482     ci = mat->i; cj = mat->j; cam = mat->a;
4483     for (i=0; i<am; i++) {
4484       /* off-diagonal portion of A */
4485       ncols_o = bi[i+1] - bi[i];
4486       for (jo=0; jo<ncols_o; jo++) {
4487         col = cmap[*bj];
4488         if (col >= cstart) break;
4489         *cam++ = *ba++; bj++;
4490       }
4491       /* diagonal portion of A */
4492       ncols_d = ai[i+1] - ai[i];
4493       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4494       /* off-diagonal portion of A */
4495       for (j=jo; j<ncols_o; j++) {
4496         *cam++ = *ba++; bj++;
4497       }
4498     }
4499   } else {
4500     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4501   }
4502 
4503   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4504   PetscFunctionReturn(0);
4505 }
4506 
4507 #undef __FUNCT__
4508 #define __FUNCT__ "MatGetLocalMatCondensed"
4509 /*@C
4510      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4511 
4512     Not Collective
4513 
4514    Input Parameters:
4515 +    A - the matrix
4516 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4517 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4518 
4519    Output Parameter:
4520 .    A_loc - the local sequential matrix generated
4521 
4522     Level: developer
4523 
4524 @*/
4525 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4526 {
4527   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4528   PetscErrorCode    ierr;
4529   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4530   IS                isrowa,iscola;
4531   Mat               *aloc;
4532 
4533   PetscFunctionBegin;
4534   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4535   if (!row){
4536     start = A->rmap->rstart; end = A->rmap->rend;
4537     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4538   } else {
4539     isrowa = *row;
4540   }
4541   if (!col){
4542     start = A->cmap->rstart;
4543     cmap  = a->garray;
4544     nzA   = a->A->cmap->n;
4545     nzB   = a->B->cmap->n;
4546     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4547     ncols = 0;
4548     for (i=0; i<nzB; i++) {
4549       if (cmap[i] < start) idx[ncols++] = cmap[i];
4550       else break;
4551     }
4552     imark = i;
4553     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4554     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4555     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4556     ierr = PetscFree(idx);CHKERRQ(ierr);
4557   } else {
4558     iscola = *col;
4559   }
4560   if (scall != MAT_INITIAL_MATRIX){
4561     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4562     aloc[0] = *A_loc;
4563   }
4564   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4565   *A_loc = aloc[0];
4566   ierr = PetscFree(aloc);CHKERRQ(ierr);
4567   if (!row){
4568     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4569   }
4570   if (!col){
4571     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4572   }
4573   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4574   PetscFunctionReturn(0);
4575 }
4576 
4577 #undef __FUNCT__
4578 #define __FUNCT__ "MatGetBrowsOfAcols"
4579 /*@C
4580     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4581 
4582     Collective on Mat
4583 
4584    Input Parameters:
4585 +    A,B - the matrices in mpiaij format
4586 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4587 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4588 
4589    Output Parameter:
4590 +    rowb, colb - index sets of rows and columns of B to extract
4591 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4592 -    B_seq - the sequential matrix generated
4593 
4594     Level: developer
4595 
4596 @*/
4597 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4598 {
4599   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4600   PetscErrorCode    ierr;
4601   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4602   IS                isrowb,iscolb;
4603   Mat               *bseq;
4604 
4605   PetscFunctionBegin;
4606   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4607     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4608   }
4609   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4610 
4611   if (scall == MAT_INITIAL_MATRIX){
4612     start = A->cmap->rstart;
4613     cmap  = a->garray;
4614     nzA   = a->A->cmap->n;
4615     nzB   = a->B->cmap->n;
4616     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4617     ncols = 0;
4618     for (i=0; i<nzB; i++) {  /* row < local row index */
4619       if (cmap[i] < start) idx[ncols++] = cmap[i];
4620       else break;
4621     }
4622     imark = i;
4623     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4624     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4625     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4626     ierr = PetscFree(idx);CHKERRQ(ierr);
4627     *brstart = imark;
4628     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4629   } else {
4630     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4631     isrowb = *rowb; iscolb = *colb;
4632     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4633     bseq[0] = *B_seq;
4634   }
4635   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4636   *B_seq = bseq[0];
4637   ierr = PetscFree(bseq);CHKERRQ(ierr);
4638   if (!rowb){
4639     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4640   } else {
4641     *rowb = isrowb;
4642   }
4643   if (!colb){
4644     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4645   } else {
4646     *colb = iscolb;
4647   }
4648   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4649   PetscFunctionReturn(0);
4650 }
4651 
4652 #undef __FUNCT__
4653 #define __FUNCT__ "MatGetBrowsOfAoCols"
4654 /*@C
4655     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4656     of the OFF-DIAGONAL portion of local A
4657 
4658     Collective on Mat
4659 
4660    Input Parameters:
4661 +    A,B - the matrices in mpiaij format
4662 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4663 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4664 .    startsj_r - similar to startsj for receives
4665 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4666 
4667    Output Parameter:
4668 +    B_oth - the sequential matrix generated
4669 
4670     Level: developer
4671 
4672 @*/
4673 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4674 {
4675   VecScatter_MPI_General *gen_to,*gen_from;
4676   PetscErrorCode         ierr;
4677   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4678   Mat_SeqAIJ             *b_oth;
4679   VecScatter             ctx=a->Mvctx;
4680   MPI_Comm               comm=((PetscObject)ctx)->comm;
4681   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4682   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4683   PetscScalar            *rvalues,*svalues;
4684   MatScalar              *b_otha,*bufa,*bufA;
4685   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4686   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4687   MPI_Status             *sstatus,rstatus;
4688   PetscMPIInt            jj;
4689   PetscInt               *cols,sbs,rbs;
4690   PetscScalar            *vals;
4691 
4692   PetscFunctionBegin;
4693   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4694     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4695   }
4696   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4697   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4698 
4699   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4700   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4701   rvalues  = gen_from->values; /* holds the length of receiving row */
4702   svalues  = gen_to->values;   /* holds the length of sending row */
4703   nrecvs   = gen_from->n;
4704   nsends   = gen_to->n;
4705 
4706   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4707   srow     = gen_to->indices;   /* local row index to be sent */
4708   sstarts  = gen_to->starts;
4709   sprocs   = gen_to->procs;
4710   sstatus  = gen_to->sstatus;
4711   sbs      = gen_to->bs;
4712   rstarts  = gen_from->starts;
4713   rprocs   = gen_from->procs;
4714   rbs      = gen_from->bs;
4715 
4716   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4717   if (scall == MAT_INITIAL_MATRIX){
4718     /* i-array */
4719     /*---------*/
4720     /*  post receives */
4721     for (i=0; i<nrecvs; i++){
4722       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4723       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4724       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4725     }
4726 
4727     /* pack the outgoing message */
4728     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
4729     sstartsj[0] = 0;  rstartsj[0] = 0;
4730     len = 0; /* total length of j or a array to be sent */
4731     k = 0;
4732     for (i=0; i<nsends; i++){
4733       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4734       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4735       for (j=0; j<nrows; j++) {
4736         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4737         for (l=0; l<sbs; l++){
4738           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4739           rowlen[j*sbs+l] = ncols;
4740           len += ncols;
4741           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4742         }
4743         k++;
4744       }
4745       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4746       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4747     }
4748     /* recvs and sends of i-array are completed */
4749     i = nrecvs;
4750     while (i--) {
4751       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4752     }
4753     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4754 
4755     /* allocate buffers for sending j and a arrays */
4756     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4757     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4758 
4759     /* create i-array of B_oth */
4760     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4761     b_othi[0] = 0;
4762     len = 0; /* total length of j or a array to be received */
4763     k = 0;
4764     for (i=0; i<nrecvs; i++){
4765       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4766       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4767       for (j=0; j<nrows; j++) {
4768         b_othi[k+1] = b_othi[k] + rowlen[j];
4769         len += rowlen[j]; k++;
4770       }
4771       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4772     }
4773 
4774     /* allocate space for j and a arrrays of B_oth */
4775     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4776     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4777 
4778     /* j-array */
4779     /*---------*/
4780     /*  post receives of j-array */
4781     for (i=0; i<nrecvs; i++){
4782       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4783       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4784     }
4785 
4786     /* pack the outgoing message j-array */
4787     k = 0;
4788     for (i=0; i<nsends; i++){
4789       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4790       bufJ = bufj+sstartsj[i];
4791       for (j=0; j<nrows; j++) {
4792         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4793         for (ll=0; ll<sbs; ll++){
4794           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4795           for (l=0; l<ncols; l++){
4796             *bufJ++ = cols[l];
4797           }
4798           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4799         }
4800       }
4801       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4802     }
4803 
4804     /* recvs and sends of j-array are completed */
4805     i = nrecvs;
4806     while (i--) {
4807       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4808     }
4809     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4810   } else if (scall == MAT_REUSE_MATRIX){
4811     sstartsj = *startsj;
4812     rstartsj = *startsj_r;
4813     bufa     = *bufa_ptr;
4814     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4815     b_otha   = b_oth->a;
4816   } else {
4817     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4818   }
4819 
4820   /* a-array */
4821   /*---------*/
4822   /*  post receives of a-array */
4823   for (i=0; i<nrecvs; i++){
4824     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4825     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4826   }
4827 
4828   /* pack the outgoing message a-array */
4829   k = 0;
4830   for (i=0; i<nsends; i++){
4831     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4832     bufA = bufa+sstartsj[i];
4833     for (j=0; j<nrows; j++) {
4834       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4835       for (ll=0; ll<sbs; ll++){
4836         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4837         for (l=0; l<ncols; l++){
4838           *bufA++ = vals[l];
4839         }
4840         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4841       }
4842     }
4843     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4844   }
4845   /* recvs and sends of a-array are completed */
4846   i = nrecvs;
4847   while (i--) {
4848     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4849   }
4850   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4851   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4852 
4853   if (scall == MAT_INITIAL_MATRIX){
4854     /* put together the new matrix */
4855     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4856 
4857     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4858     /* Since these are PETSc arrays, change flags to free them as necessary. */
4859     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4860     b_oth->free_a  = PETSC_TRUE;
4861     b_oth->free_ij = PETSC_TRUE;
4862     b_oth->nonew   = 0;
4863 
4864     ierr = PetscFree(bufj);CHKERRQ(ierr);
4865     if (!startsj || !bufa_ptr){
4866       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4867       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4868     } else {
4869       *startsj   = sstartsj;
4870       *startsj_r = rstartsj;
4871       *bufa_ptr  = bufa;
4872     }
4873   }
4874   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4875   PetscFunctionReturn(0);
4876 }
4877 
4878 #undef __FUNCT__
4879 #define __FUNCT__ "MatGetCommunicationStructs"
4880 /*@C
4881   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4882 
4883   Not Collective
4884 
4885   Input Parameters:
4886 . A - The matrix in mpiaij format
4887 
4888   Output Parameter:
4889 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4890 . colmap - A map from global column index to local index into lvec
4891 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4892 
4893   Level: developer
4894 
4895 @*/
4896 #if defined (PETSC_USE_CTABLE)
4897 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4898 #else
4899 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4900 #endif
4901 {
4902   Mat_MPIAIJ *a;
4903 
4904   PetscFunctionBegin;
4905   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4906   PetscValidPointer(lvec, 2);
4907   PetscValidPointer(colmap, 3);
4908   PetscValidPointer(multScatter, 4);
4909   a = (Mat_MPIAIJ *) A->data;
4910   if (lvec) *lvec = a->lvec;
4911   if (colmap) *colmap = a->colmap;
4912   if (multScatter) *multScatter = a->Mvctx;
4913   PetscFunctionReturn(0);
4914 }
4915 
4916 EXTERN_C_BEGIN
4917 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*);
4918 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*);
4919 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
4920 EXTERN_C_END
4921 
4922 #undef __FUNCT__
4923 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4924 /*
4925     Computes (B'*A')' since computing B*A directly is untenable
4926 
4927                n                       p                          p
4928         (              )       (              )         (                  )
4929       m (      A       )  *  n (       B      )   =   m (         C        )
4930         (              )       (              )         (                  )
4931 
4932 */
4933 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4934 {
4935   PetscErrorCode     ierr;
4936   Mat                At,Bt,Ct;
4937 
4938   PetscFunctionBegin;
4939   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4940   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4941   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4942   ierr = MatDestroy(At);CHKERRQ(ierr);
4943   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4944   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4945   ierr = MatDestroy(Ct);CHKERRQ(ierr);
4946   PetscFunctionReturn(0);
4947 }
4948 
4949 #undef __FUNCT__
4950 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4951 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4952 {
4953   PetscErrorCode ierr;
4954   PetscInt       m=A->rmap->n,n=B->cmap->n;
4955   Mat            Cmat;
4956 
4957   PetscFunctionBegin;
4958   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4959   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
4960   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4961   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4962   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
4963   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4964   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4965   *C   = Cmat;
4966   PetscFunctionReturn(0);
4967 }
4968 
4969 /* ----------------------------------------------------------------*/
4970 #undef __FUNCT__
4971 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4972 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4973 {
4974   PetscErrorCode ierr;
4975 
4976   PetscFunctionBegin;
4977   if (scall == MAT_INITIAL_MATRIX){
4978     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4979   }
4980   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4981   PetscFunctionReturn(0);
4982 }
4983 
4984 EXTERN_C_BEGIN
4985 #if defined(PETSC_HAVE_MUMPS)
4986 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
4987 #endif
4988 #if defined(PETSC_HAVE_PASTIX)
4989 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
4990 #endif
4991 #if defined(PETSC_HAVE_SUPERLU_DIST)
4992 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
4993 #endif
4994 #if defined(PETSC_HAVE_SPOOLES)
4995 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
4996 #endif
4997 EXTERN_C_END
4998 
4999 /*MC
5000    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5001 
5002    Options Database Keys:
5003 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5004 
5005   Level: beginner
5006 
5007 .seealso: MatCreateMPIAIJ()
5008 M*/
5009 
5010 EXTERN_C_BEGIN
5011 #undef __FUNCT__
5012 #define __FUNCT__ "MatCreate_MPIAIJ"
5013 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
5014 {
5015   Mat_MPIAIJ     *b;
5016   PetscErrorCode ierr;
5017   PetscMPIInt    size;
5018 
5019   PetscFunctionBegin;
5020   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5021 
5022   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5023   B->data         = (void*)b;
5024   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5025   B->rmap->bs     = 1;
5026   B->assembled    = PETSC_FALSE;
5027   B->mapping      = 0;
5028 
5029   B->insertmode   = NOT_SET_VALUES;
5030   b->size         = size;
5031   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5032 
5033   /* build cache for off array entries formed */
5034   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5035   b->donotstash  = PETSC_FALSE;
5036   b->colmap      = 0;
5037   b->garray      = 0;
5038   b->roworiented = PETSC_TRUE;
5039 
5040   /* stuff used for matrix vector multiply */
5041   b->lvec      = PETSC_NULL;
5042   b->Mvctx     = PETSC_NULL;
5043 
5044   /* stuff for MatGetRow() */
5045   b->rowindices   = 0;
5046   b->rowvalues    = 0;
5047   b->getrowactive = PETSC_FALSE;
5048 
5049 #if defined(PETSC_HAVE_SPOOLES)
5050   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5051                                      "MatGetFactor_mpiaij_spooles",
5052                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5053 #endif
5054 #if defined(PETSC_HAVE_MUMPS)
5055   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5056                                      "MatGetFactor_aij_mumps",
5057                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5058 #endif
5059 #if defined(PETSC_HAVE_PASTIX)
5060   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5061 					   "MatGetFactor_mpiaij_pastix",
5062 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5063 #endif
5064 #if defined(PETSC_HAVE_SUPERLU_DIST)
5065   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5066                                      "MatGetFactor_mpiaij_superlu_dist",
5067                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5068 #endif
5069   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5070                                      "MatStoreValues_MPIAIJ",
5071                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5072   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5073                                      "MatRetrieveValues_MPIAIJ",
5074                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5075   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5076 				     "MatGetDiagonalBlock_MPIAIJ",
5077                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5078   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5079 				     "MatIsTranspose_MPIAIJ",
5080 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5081   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5082 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5083 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5084   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5085 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5086 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5087   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5088 				     "MatDiagonalScaleLocal_MPIAIJ",
5089 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5090   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
5091                                      "MatConvert_MPIAIJ_MPICSRPERM",
5092                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
5093   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
5094                                      "MatConvert_MPIAIJ_MPICRL",
5095                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
5096   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5097                                      "MatConvert_MPIAIJ_MPISBAIJ",
5098                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5099   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5100                                      "MatMatMult_MPIDense_MPIAIJ",
5101                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5102   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5103                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5104                                       MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5105   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5106                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5107                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5108   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5109   PetscFunctionReturn(0);
5110 }
5111 EXTERN_C_END
5112 
5113 #undef __FUNCT__
5114 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5115 /*@
5116      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5117          and "off-diagonal" part of the matrix in CSR format.
5118 
5119    Collective on MPI_Comm
5120 
5121    Input Parameters:
5122 +  comm - MPI communicator
5123 .  m - number of local rows (Cannot be PETSC_DECIDE)
5124 .  n - This value should be the same as the local size used in creating the
5125        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5126        calculated if N is given) For square matrices n is almost always m.
5127 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5128 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5129 .   i - row indices for "diagonal" portion of matrix
5130 .   j - column indices
5131 .   a - matrix values
5132 .   oi - row indices for "off-diagonal" portion of matrix
5133 .   oj - column indices
5134 -   oa - matrix values
5135 
5136    Output Parameter:
5137 .   mat - the matrix
5138 
5139    Level: advanced
5140 
5141    Notes:
5142        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5143 
5144        The i and j indices are 0 based
5145 
5146        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5147 
5148        This sets local rows and cannot be used to set off-processor values.
5149 
5150        You cannot later use MatSetValues() to change values in this matrix.
5151 
5152 .keywords: matrix, aij, compressed row, sparse, parallel
5153 
5154 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5155           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5156 @*/
5157 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5158 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5159 {
5160   PetscErrorCode ierr;
5161   Mat_MPIAIJ     *maij;
5162 
5163  PetscFunctionBegin;
5164   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5165   if (i[0]) {
5166     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5167   }
5168   if (oi[0]) {
5169     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5170   }
5171   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5172   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5173   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5174   maij = (Mat_MPIAIJ*) (*mat)->data;
5175   maij->donotstash     = PETSC_TRUE;
5176   (*mat)->preallocated = PETSC_TRUE;
5177 
5178   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5179   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5180   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5181   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5182 
5183   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5184   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5185 
5186   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5187   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5188   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5189   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5190 
5191   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5192   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5193   PetscFunctionReturn(0);
5194 }
5195 
5196 /*
5197     Special version for direct calls from Fortran
5198 */
5199 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5200 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5201 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5202 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5203 #endif
5204 
5205 /* Change these macros so can be used in void function */
5206 #undef CHKERRQ
5207 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5208 #undef SETERRQ2
5209 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5210 #undef SETERRQ
5211 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5212 
5213 EXTERN_C_BEGIN
5214 #undef __FUNCT__
5215 #define __FUNCT__ "matsetvaluesmpiaij_"
5216 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5217 {
5218   Mat             mat = *mmat;
5219   PetscInt        m = *mm, n = *mn;
5220   InsertMode      addv = *maddv;
5221   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5222   PetscScalar     value;
5223   PetscErrorCode  ierr;
5224 
5225   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5226   if (mat->insertmode == NOT_SET_VALUES) {
5227     mat->insertmode = addv;
5228   }
5229 #if defined(PETSC_USE_DEBUG)
5230   else if (mat->insertmode != addv) {
5231     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5232   }
5233 #endif
5234   {
5235   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5236   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5237   PetscTruth      roworiented = aij->roworiented;
5238 
5239   /* Some Variables required in the macro */
5240   Mat             A = aij->A;
5241   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5242   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5243   MatScalar       *aa = a->a;
5244   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5245   Mat             B = aij->B;
5246   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5247   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5248   MatScalar       *ba = b->a;
5249 
5250   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5251   PetscInt        nonew = a->nonew;
5252   MatScalar       *ap1,*ap2;
5253 
5254   PetscFunctionBegin;
5255   for (i=0; i<m; i++) {
5256     if (im[i] < 0) continue;
5257 #if defined(PETSC_USE_DEBUG)
5258     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5259 #endif
5260     if (im[i] >= rstart && im[i] < rend) {
5261       row      = im[i] - rstart;
5262       lastcol1 = -1;
5263       rp1      = aj + ai[row];
5264       ap1      = aa + ai[row];
5265       rmax1    = aimax[row];
5266       nrow1    = ailen[row];
5267       low1     = 0;
5268       high1    = nrow1;
5269       lastcol2 = -1;
5270       rp2      = bj + bi[row];
5271       ap2      = ba + bi[row];
5272       rmax2    = bimax[row];
5273       nrow2    = bilen[row];
5274       low2     = 0;
5275       high2    = nrow2;
5276 
5277       for (j=0; j<n; j++) {
5278         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5279         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5280         if (in[j] >= cstart && in[j] < cend){
5281           col = in[j] - cstart;
5282           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5283         } else if (in[j] < 0) continue;
5284 #if defined(PETSC_USE_DEBUG)
5285         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5286 #endif
5287         else {
5288           if (mat->was_assembled) {
5289             if (!aij->colmap) {
5290               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5291             }
5292 #if defined (PETSC_USE_CTABLE)
5293             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5294 	    col--;
5295 #else
5296             col = aij->colmap[in[j]] - 1;
5297 #endif
5298             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5299               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5300               col =  in[j];
5301               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5302               B = aij->B;
5303               b = (Mat_SeqAIJ*)B->data;
5304               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5305               rp2      = bj + bi[row];
5306               ap2      = ba + bi[row];
5307               rmax2    = bimax[row];
5308               nrow2    = bilen[row];
5309               low2     = 0;
5310               high2    = nrow2;
5311               bm       = aij->B->rmap->n;
5312               ba = b->a;
5313             }
5314           } else col = in[j];
5315           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5316         }
5317       }
5318     } else {
5319       if (!aij->donotstash) {
5320         if (roworiented) {
5321           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5322         } else {
5323           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5324         }
5325       }
5326     }
5327   }}
5328   PetscFunctionReturnVoid();
5329 }
5330 EXTERN_C_END
5331 
5332